faster translation endpoint
This commit is contained in:
@@ -5,8 +5,10 @@ import (
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.ma-al.com/goc_daniel/b2b/app/db"
|
||||
"git.ma-al.com/goc_daniel/b2b/app/model"
|
||||
@@ -26,27 +28,9 @@ type ProductDescriptionService struct {
|
||||
|
||||
func New() *ProductDescriptionService {
|
||||
return &ProductDescriptionService{
|
||||
db: db.Get(),
|
||||
client: openai.NewClient(option.WithAPIKey("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")),
|
||||
}
|
||||
}
|
||||
|
||||
func isValidXHTML(s string) bool {
|
||||
decoder := xml.NewDecoder(strings.NewReader(s))
|
||||
hasStartTag := false
|
||||
|
||||
for {
|
||||
tok, err := decoder.Token()
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
return hasStartTag
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
if _, ok := tok.(xml.StartElement); ok {
|
||||
hasStartTag = true
|
||||
}
|
||||
db: db.Get(),
|
||||
client: openai.NewClient(option.WithAPIKey("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"),
|
||||
option.WithHTTPClient(&http.Client{Timeout: 300 * time.Second})),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -80,7 +64,7 @@ func (s *ProductDescriptionService) SaveProductDescription(userID uint, productI
|
||||
for i := 0; i < len(mustBeHTML); i++ {
|
||||
if text, exists := updates[mustBeHTML[i]]; exists {
|
||||
if !isValidXHTML(text) {
|
||||
return responseErrors.ErrInvalidHTML
|
||||
return responseErrors.ErrInvalidXHTML
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -136,54 +120,41 @@ func (s *ProductDescriptionService) TranslateProductDescription(userID uint, pro
|
||||
return nil, err
|
||||
}
|
||||
|
||||
request := "Translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website.\n\n"
|
||||
request += "\n"
|
||||
request += "<translation_of_product_description>"
|
||||
request := "Translate to " + lang.ISOCode + " without changing the html structure."
|
||||
request += "\n\n<translation_of_product_description>"
|
||||
request += ProductDescription.Description
|
||||
request += "</translation_of_product_description>"
|
||||
request += "\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
|
||||
request += "\n"
|
||||
request += "<translation_of_product_short_description>"
|
||||
request += "</translation_of_product_description>\n\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
|
||||
request += "\n\n<translation_of_product_short_description>"
|
||||
request += ProductDescription.DescriptionShort
|
||||
request += "</translation_of_product_short_description>"
|
||||
request += "\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
|
||||
request += "\n"
|
||||
request += "<translation_of_product_meta_description>"
|
||||
request += "</translation_of_product_short_description>\n\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
|
||||
request += "\n\n<translation_of_product_meta_description>"
|
||||
request += ProductDescription.MetaDescription
|
||||
request += "</translation_of_product_meta_description>"
|
||||
request += "\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
|
||||
request += "\n"
|
||||
request += "<translation_of_product_meta_title>"
|
||||
request += "</translation_of_product_meta_description>\n\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
|
||||
request += "\n\n<translation_of_product_meta_title>"
|
||||
request += ProductDescription.MetaTitle
|
||||
request += "</translation_of_product_meta_title>"
|
||||
request += "\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
|
||||
request += "\n"
|
||||
request += "<translation_of_product_name>"
|
||||
request += "</translation_of_product_meta_title>\n\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
|
||||
request += "\n\n<translation_of_product_name>"
|
||||
request += ProductDescription.Name
|
||||
request += "</translation_of_product_name>"
|
||||
request += "\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
|
||||
request += "\n"
|
||||
request += "<translation_of_display_text_available_now>"
|
||||
request += "</translation_of_product_name>\n\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
|
||||
request += "\n\n<translation_of_display_text_available_now>"
|
||||
request += ProductDescription.AvailableNow
|
||||
request += "</translation_of_display_text_available_now>"
|
||||
request += "\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
|
||||
request += "\n"
|
||||
request += "<translation_of_display_text_available_later>"
|
||||
request += "</translation_of_display_text_available_now>\n\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
|
||||
request += "\n\n<translation_of_display_text_available_later>"
|
||||
request += ProductDescription.AvailableLater
|
||||
request += "</translation_of_display_text_available_later>"
|
||||
request += "\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure. You must only translate text visible on website."
|
||||
request += "\n"
|
||||
request += "<translation_of_product_usage>"
|
||||
request += "</translation_of_display_text_available_later>\n\n"
|
||||
request += "Remember: translate to " + lang.ISOCode + " without changing the html structure."
|
||||
request += "\n\n<translation_of_product_usage>"
|
||||
request += ProductDescription.Usage
|
||||
request += "</translation_of_product_usage>"
|
||||
|
||||
request = cleanForPrompt(request)
|
||||
|
||||
openai_response, err := s.client.Responses.New(context.Background(), responses.ResponseNewParams{
|
||||
Input: responses.ResponseNewParamsInputUnion{OfString: openai.String(request)},
|
||||
Model: openai.ChatModelGPT4_1Mini,
|
||||
@@ -194,61 +165,136 @@ func (s *ProductDescriptionService) TranslateProductDescription(userID uint, pro
|
||||
}
|
||||
output := openai_response.OutputText()
|
||||
|
||||
// for debugging purposes
|
||||
// fi, err := os.ReadFile("/home/daniel/coding/work/b2b/app/service/productDescriptionService/test.txt") // just pass the file name
|
||||
// for testing purposes
|
||||
// fi, err := os.ReadFile("/home/daniel/coding/work/b2b/app/service/productDescriptionService/test_out.txt") // just pass the file name
|
||||
// output := string(fi)
|
||||
|
||||
success, match := GetStringInBetween(output, "<translation_of_product_description>", "</translation_of_product_description>")
|
||||
success, resolution := resolveResponse(ProductDescription.Description, output, "translation_of_product_description")
|
||||
if !success {
|
||||
return nil, responseErrors.ErrOpenAIBadOutput
|
||||
}
|
||||
ProductDescription.Description = match
|
||||
ProductDescription.Description = resolution
|
||||
|
||||
success, match = GetStringInBetween(output, "<translation_of_product_short_description>", "</translation_of_product_short_description>")
|
||||
success, resolution = resolveResponse(ProductDescription.DescriptionShort, output, "translation_of_product_short_description")
|
||||
if !success {
|
||||
return nil, responseErrors.ErrOpenAIBadOutput
|
||||
}
|
||||
ProductDescription.DescriptionShort = match
|
||||
ProductDescription.DescriptionShort = resolution
|
||||
|
||||
success, match = GetStringInBetween(output, "<translation_of_product_meta_description>", "</translation_of_product_meta_description>")
|
||||
success, resolution = resolveResponse(ProductDescription.MetaDescription, output, "translation_of_product_meta_description")
|
||||
if !success {
|
||||
return nil, responseErrors.ErrOpenAIBadOutput
|
||||
}
|
||||
ProductDescription.MetaDescription = match
|
||||
ProductDescription.MetaDescription = resolution
|
||||
|
||||
success, match = GetStringInBetween(output, "<translation_of_product_meta_title>", "</translation_of_product_meta_title>")
|
||||
success, resolution = resolveResponse(ProductDescription.MetaTitle, output, "translation_of_product_meta_title")
|
||||
if !success {
|
||||
return nil, responseErrors.ErrOpenAIBadOutput
|
||||
}
|
||||
ProductDescription.MetaTitle = match
|
||||
ProductDescription.MetaTitle = resolution
|
||||
|
||||
success, match = GetStringInBetween(output, "<translation_of_product_name>", "</translation_of_product_name>")
|
||||
success, resolution = resolveResponse(ProductDescription.Name, output, "translation_of_product_name")
|
||||
if !success {
|
||||
return nil, responseErrors.ErrOpenAIBadOutput
|
||||
}
|
||||
ProductDescription.Name = match
|
||||
ProductDescription.Name = resolution
|
||||
|
||||
success, match = GetStringInBetween(output, "<translation_of_display_text_available_now>", "</translation_of_display_text_available_now>")
|
||||
success, resolution = resolveResponse(ProductDescription.AvailableNow, output, "translation_of_display_text_available_now")
|
||||
if !success {
|
||||
return nil, responseErrors.ErrOpenAIBadOutput
|
||||
}
|
||||
ProductDescription.AvailableNow = match
|
||||
ProductDescription.AvailableNow = resolution
|
||||
|
||||
success, match = GetStringInBetween(output, "<translation_of_display_text_available_later>", "</translation_of_display_text_available_later>")
|
||||
success, resolution = resolveResponse(ProductDescription.AvailableLater, output, "translation_of_display_text_available_later")
|
||||
if !success {
|
||||
return nil, responseErrors.ErrOpenAIBadOutput
|
||||
}
|
||||
ProductDescription.AvailableLater = match
|
||||
ProductDescription.AvailableLater = resolution
|
||||
|
||||
success, match = GetStringInBetween(output, "<translation_of_product_usage>", "</translation_of_product_usage>")
|
||||
success, resolution = resolveResponse(ProductDescription.Usage, output, "translation_of_product_usage")
|
||||
if !success {
|
||||
return nil, responseErrors.ErrOpenAIBadOutput
|
||||
}
|
||||
ProductDescription.Usage = match
|
||||
ProductDescription.Usage = resolution
|
||||
|
||||
return &ProductDescription, nil
|
||||
}
|
||||
|
||||
// isValidXHTML checks if the string obeys the XHTML format
|
||||
func isValidXHTML(s string) bool {
|
||||
r := strings.NewReader(s)
|
||||
d := xml.NewDecoder(r)
|
||||
|
||||
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
|
||||
d.Strict = true
|
||||
d.AutoClose = xml.HTMLAutoClose
|
||||
d.Entity = xml.HTMLEntity
|
||||
for {
|
||||
_, err := d.Token()
|
||||
switch err {
|
||||
case io.EOF:
|
||||
return true // We're done, it's valid!
|
||||
case nil:
|
||||
default:
|
||||
return false // Oops, something wasn't right
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func cleanForPrompt(s string) string {
|
||||
r := strings.NewReader(s)
|
||||
d := xml.NewDecoder(r)
|
||||
|
||||
prompt := ""
|
||||
|
||||
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
|
||||
d.Strict = true
|
||||
d.AutoClose = xml.HTMLAutoClose
|
||||
d.Entity = xml.HTMLEntity
|
||||
for {
|
||||
token, err := d.Token()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
switch v := token.(type) {
|
||||
case xml.StartElement:
|
||||
prompt += "<" + AttrName(v.Name)
|
||||
|
||||
for _, attr := range v.Attr {
|
||||
if v.Name.Local == "img" && attr.Name.Local == "alt" {
|
||||
prompt += fmt.Sprintf(` %s="%s"`, AttrName(attr.Name), attr.Value)
|
||||
}
|
||||
}
|
||||
|
||||
prompt += ">"
|
||||
case xml.EndElement:
|
||||
prompt += "</" + AttrName(v.Name) + ">"
|
||||
case xml.CharData:
|
||||
prompt += string(v)
|
||||
case xml.Comment:
|
||||
case xml.ProcInst:
|
||||
case xml.Directive:
|
||||
}
|
||||
}
|
||||
|
||||
return prompt
|
||||
}
|
||||
|
||||
func resolveResponse(original string, response string, key string) (bool, string) {
|
||||
success, match := GetStringInBetween(response, "<"+key+">", "</"+key+">")
|
||||
if !success || !isValidXHTML(match) {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
success, resolution := RebuildFromResponse("<"+key+">"+original+"</"+key+">", "<"+key+">"+match+"</"+key+">")
|
||||
if !success {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
return true, resolution[2+len(key) : len(resolution)-3-len(key)]
|
||||
}
|
||||
|
||||
// GetStringInBetween returns empty string if no start or end string found
|
||||
func GetStringInBetween(str string, start string, end string) (success bool, result string) {
|
||||
s := strings.Index(str, start)
|
||||
@@ -263,3 +309,140 @@ func GetStringInBetween(str string, start string, end string) (success bool, res
|
||||
|
||||
return true, str[s : s+e]
|
||||
}
|
||||
|
||||
// Rebuilds HTML using the original HTML as a template and the response as a source
|
||||
// Assumes that both original and response have the exact same XML structure
|
||||
func RebuildFromResponse(s_original string, s_response string) (bool, string) {
|
||||
|
||||
r_original := strings.NewReader(s_original)
|
||||
d_original := xml.NewDecoder(r_original)
|
||||
|
||||
r_response := strings.NewReader(s_response)
|
||||
d_response := xml.NewDecoder(r_response)
|
||||
|
||||
result := ""
|
||||
|
||||
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
|
||||
d_original.Strict = true
|
||||
d_original.AutoClose = xml.HTMLAutoClose
|
||||
d_original.Entity = xml.HTMLEntity
|
||||
|
||||
d_response.Strict = true
|
||||
d_response.AutoClose = xml.HTMLAutoClose
|
||||
d_response.Entity = xml.HTMLEntity
|
||||
|
||||
token_original, err_original := d_original.Token()
|
||||
token_response, err_response := d_response.Token()
|
||||
|
||||
for {
|
||||
// err_original can only be EOF or nil.
|
||||
if err_original != nil || err_response != nil {
|
||||
if err_original != err_response {
|
||||
return false, ""
|
||||
}
|
||||
return true, result
|
||||
}
|
||||
|
||||
switch v_original := token_original.(type) {
|
||||
case xml.StartElement:
|
||||
switch v_response := token_response.(type) {
|
||||
case xml.StartElement:
|
||||
if v_original.Name.Space != v_response.Name.Space || v_original.Name.Local != v_response.Name.Local {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
result += "<" + AttrName(v_original.Name)
|
||||
|
||||
for _, attr := range v_original.Attr {
|
||||
if v_original.Name.Local != "img" || attr.Name.Local != "alt" {
|
||||
result += fmt.Sprintf(` %s="%s"`, AttrName(attr.Name), attr.Value)
|
||||
}
|
||||
}
|
||||
|
||||
for _, attr := range v_response.Attr {
|
||||
if v_response.Name.Local == "img" && attr.Name.Local == "alt" {
|
||||
result += fmt.Sprintf(` %s="%s"`, AttrName(attr.Name), attr.Value)
|
||||
}
|
||||
}
|
||||
result += ">"
|
||||
|
||||
case xml.CharData:
|
||||
result += string(v_response)
|
||||
token_response, err_response = d_response.Token()
|
||||
continue
|
||||
|
||||
default:
|
||||
return false, ""
|
||||
}
|
||||
|
||||
case xml.EndElement:
|
||||
switch v_response := token_response.(type) {
|
||||
case xml.EndElement:
|
||||
if v_original.Name.Space != v_response.Name.Space || v_original.Name.Local != v_response.Name.Local {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
if v_original.Name.Local != "img" {
|
||||
result += "</" + AttrName(v_original.Name) + ">"
|
||||
}
|
||||
|
||||
case xml.CharData:
|
||||
result += string(v_response)
|
||||
token_response, err_response = d_response.Token()
|
||||
continue
|
||||
|
||||
default:
|
||||
return false, ""
|
||||
}
|
||||
|
||||
case xml.CharData:
|
||||
switch v_response := token_response.(type) {
|
||||
case xml.CharData:
|
||||
result += string(v_response)
|
||||
|
||||
case xml.StartElement:
|
||||
result += string(v_original)
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
|
||||
case xml.EndElement:
|
||||
result += string(v_original)
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
|
||||
default:
|
||||
return false, ""
|
||||
}
|
||||
|
||||
case xml.Comment:
|
||||
result += "<!--" + string(v_original) + "-->"
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
|
||||
case xml.ProcInst:
|
||||
if len(v_original.Inst) == 0 {
|
||||
result += "<?" + v_original.Target + "?>"
|
||||
} else {
|
||||
result += "<?" + v_original.Target + " " + string(v_original.Inst) + "?>"
|
||||
}
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
|
||||
case xml.Directive:
|
||||
result += "<!" + string(v_original) + ">"
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
}
|
||||
|
||||
token_original, err_original = d_original.Token()
|
||||
token_response, err_response = d_response.Token()
|
||||
}
|
||||
}
|
||||
|
||||
func AttrName(name xml.Name) string {
|
||||
if name.Space == "" {
|
||||
return name.Local
|
||||
} else {
|
||||
return name.Space + ":" + name.Local
|
||||
}
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
0
app/service/productDescriptionService/test_in.txt
Normal file
0
app/service/productDescriptionService/test_in.txt
Normal file
@@ -40,7 +40,7 @@ var (
|
||||
// Typed errors for product description handler
|
||||
ErrBadAttribute = errors.New("bad attribute")
|
||||
ErrBadField = errors.New("this field can not be updated")
|
||||
ErrInvalidHTML = errors.New("text is not in html format")
|
||||
ErrInvalidXHTML = errors.New("text is not in xhtml format")
|
||||
ErrOpenAIResponseFail = errors.New("OpenAI responded with failure")
|
||||
ErrOpenAIBadOutput = errors.New("OpenAI response does not obey the format")
|
||||
)
|
||||
@@ -116,7 +116,7 @@ func GetErrorCode(c fiber.Ctx, err error) string {
|
||||
return i18n.T_(c, "error.err_bad_attribute")
|
||||
case errors.Is(err, ErrBadField):
|
||||
return i18n.T_(c, "error.err_bad_field")
|
||||
case errors.Is(err, ErrInvalidHTML):
|
||||
case errors.Is(err, ErrInvalidXHTML):
|
||||
return i18n.T_(c, "error.err_invalid_html")
|
||||
case errors.Is(err, ErrOpenAIResponseFail):
|
||||
return i18n.T_(c, "error.err_openai_response_fail")
|
||||
@@ -154,7 +154,7 @@ func GetErrorStatus(err error) int {
|
||||
errors.Is(err, ErrInvalidPassword),
|
||||
errors.Is(err, ErrBadAttribute),
|
||||
errors.Is(err, ErrBadField),
|
||||
errors.Is(err, ErrInvalidHTML):
|
||||
errors.Is(err, ErrInvalidXHTML):
|
||||
return fiber.StatusBadRequest
|
||||
case errors.Is(err, ErrEmailExists):
|
||||
return fiber.StatusConflict
|
||||
|
||||
Reference in New Issue
Block a user