some debugging
This commit is contained in:
@@ -0,0 +1,447 @@
|
||||
package productTranslationService
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"os"
|
||||
"slices"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"cloud.google.com/go/auth/credentials"
|
||||
translate "cloud.google.com/go/translate/apiv3"
|
||||
"cloud.google.com/go/translate/apiv3/translatepb"
|
||||
"git.ma-al.com/goc_daniel/b2b/app/config"
|
||||
"git.ma-al.com/goc_daniel/b2b/app/model"
|
||||
"git.ma-al.com/goc_daniel/b2b/app/repos/productDescriptionRepo"
|
||||
"git.ma-al.com/goc_daniel/b2b/app/service/langsService"
|
||||
"git.ma-al.com/goc_daniel/b2b/app/utils/responseErrors"
|
||||
"github.com/openai/openai-go/v3"
|
||||
"github.com/openai/openai-go/v3/option"
|
||||
"github.com/openai/openai-go/v3/responses"
|
||||
googleopt "google.golang.org/api/option"
|
||||
)
|
||||
|
||||
type ProductTranslationService struct {
|
||||
productDescriptionRepo productDescriptionRepo.UIProductDescriptionRepo
|
||||
ctx context.Context
|
||||
googleCli translate.TranslationClient
|
||||
projectID string
|
||||
openAIClient openai.Client
|
||||
}
|
||||
|
||||
// New creates a ProductTranslationService and authenticates against the
|
||||
// Google Cloud Translation API using a service account key file.
|
||||
//
|
||||
// Required configuration (set in .env or environment):
|
||||
//
|
||||
// GOOGLE_APPLICATION_CREDENTIALS – absolute path to the service account JSON key file
|
||||
// GOOGLE_CLOUD_PROJECT_ID – your Google Cloud project ID
|
||||
//
|
||||
// The service account must have the "Cloud Translation API User" role
|
||||
// (roles/cloudtranslate.user) granted in Google Cloud IAM.
|
||||
func New() *ProductTranslationService {
|
||||
ctx := context.Background()
|
||||
cfg := config.Get()
|
||||
|
||||
// Read the service account key file whose path comes from config / env.
|
||||
data, err := os.ReadFile(cfg.GoogleTranslate.CredentialsFile)
|
||||
if err != nil {
|
||||
log.Fatalf("ProductTranslationService: cannot read credentials file %q: %v",
|
||||
cfg.GoogleTranslate.CredentialsFile, err)
|
||||
}
|
||||
|
||||
// Build OAuth2 credentials scoped to the Cloud Translation API.
|
||||
// The correct scope for Cloud Translation v3 is "cloud-translation".
|
||||
creds, err := credentials.DetectDefault(&credentials.DetectOptions{
|
||||
Scopes: []string{"https://www.googleapis.com/auth/cloud-translation"},
|
||||
CredentialsJSON: data,
|
||||
})
|
||||
if err != nil {
|
||||
log.Fatalf("ProductTranslationService: cannot build Google credentials: %v", err)
|
||||
}
|
||||
|
||||
googleCli, err := translate.NewTranslationClient(ctx, googleopt.WithAuthCredentials(creds))
|
||||
if err != nil {
|
||||
log.Fatalf("ProductTranslationService: cannot create Translation client: %v", err)
|
||||
}
|
||||
|
||||
openAIClient := openai.NewClient(option.WithAPIKey(os.Getenv("OPENAI_KEY")),
|
||||
option.WithHTTPClient(&http.Client{Timeout: 300 * time.Second})) // five minutes timeout
|
||||
|
||||
return &ProductTranslationService{
|
||||
productDescriptionRepo: productDescriptionRepo.New(),
|
||||
ctx: ctx,
|
||||
openAIClient: openAIClient,
|
||||
googleCli: *googleCli,
|
||||
projectID: cfg.GoogleTranslate.ProjectID,
|
||||
}
|
||||
}
|
||||
|
||||
func (s *ProductTranslationService) GetProductDescription(userID uint, productID uint, productLangID uint) (*model.ProductDescription, error) {
|
||||
return s.productDescriptionRepo.GetProductDescription(productID, productLangID)
|
||||
}
|
||||
|
||||
// Updates relevant fields with the "updates" map
|
||||
func (s *ProductTranslationService) SaveProductDescription(userID uint, productID uint, productLangID uint, updates map[string]string) error {
|
||||
// only some fields can be affected
|
||||
allowedFields := []string{"description", "description_short", "meta_description", "meta_title", "name", "available_now", "available_later", "usage"}
|
||||
for key := range updates {
|
||||
if !slices.Contains(allowedFields, key) {
|
||||
return responseErrors.ErrBadField
|
||||
}
|
||||
}
|
||||
|
||||
// check that fields description, description_short and usage, if they exist, have a valid html format
|
||||
mustBeHTML := []string{"description", "description_short", "usage"}
|
||||
for i := 0; i < len(mustBeHTML); i++ {
|
||||
if text, exists := updates[mustBeHTML[i]]; exists {
|
||||
if !isValidXHTML(text) {
|
||||
return responseErrors.ErrInvalidXHTML
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
err := s.productDescriptionRepo.CreateIfDoesNotExist(productID, productLangID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return s.productDescriptionRepo.UpdateFields(productID, productLangID, updates)
|
||||
}
|
||||
|
||||
// TranslateProductDescription fetches the product description for productFromLangID,
|
||||
// translates every text field into productToLangID using the Google Cloud
|
||||
// Translation API (v3 TranslateText), and returns the translated record.
|
||||
//
|
||||
// The Google Cloud project must have the Cloud Translation API enabled and the
|
||||
// service account must hold the "Cloud Translation API User" role.
|
||||
func (s *ProductTranslationService) TranslateProductDescription(userID uint, productID uint, productFromLangID uint, productToLangID uint, aiModel string) (*model.ProductDescription, error) {
|
||||
|
||||
productDescription, err := s.productDescriptionRepo.GetProductDescription(productID, productFromLangID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
productDescription.LangID = productToLangID
|
||||
|
||||
// we translate all changeable fields, and we keep the exact same HTML structure in relevant fields.
|
||||
lang, err := langsService.LangSrv.GetLanguageById(productToLangID)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fields := []*string{&productDescription.Description,
|
||||
&productDescription.DescriptionShort,
|
||||
&productDescription.MetaDescription,
|
||||
&productDescription.MetaTitle,
|
||||
&productDescription.Name,
|
||||
&productDescription.AvailableNow,
|
||||
&productDescription.AvailableLater,
|
||||
&productDescription.Usage,
|
||||
}
|
||||
keys := []string{"translation_of_product_description",
|
||||
"translation_of_product_short_description",
|
||||
"translation_of_product_meta_description",
|
||||
"translation_of_product_meta_title",
|
||||
"translation_of_product_name",
|
||||
"translation_of_product_available_now",
|
||||
"translation_of_product_available_later",
|
||||
"translation_of_product_usage",
|
||||
}
|
||||
|
||||
request := ""
|
||||
if aiModel == "OpenAI" {
|
||||
request = "Translate to " + lang.ISOCode + " without changing the html structure.\n"
|
||||
}
|
||||
for i := 0; i < len(keys); i++ {
|
||||
request += "\n<" + keys[i] + ">"
|
||||
request += *fields[i]
|
||||
request += "</" + keys[i] + ">\n"
|
||||
}
|
||||
if aiModel == "OpenAI" {
|
||||
request = cleanForPrompt(request)
|
||||
}
|
||||
|
||||
if aiModel == "OpenAI" {
|
||||
response, _ := s.openAIClient.Responses.New(context.Background(), responses.ResponseNewParams{
|
||||
Input: responses.ResponseNewParamsInputUnion{OfString: openai.String(request)},
|
||||
Model: openai.ChatModelGPT4_1Mini,
|
||||
// Model: openai.ChatModelGPT4_1Nano,
|
||||
})
|
||||
if response.Status != "completed" {
|
||||
return nil, responseErrors.ErrAIResponseFail
|
||||
}
|
||||
|
||||
for i := 0; i < len(keys); i++ {
|
||||
success, resolution := resolveResponse(*fields[i], response.OutputText(), keys[i])
|
||||
if !success {
|
||||
return nil, responseErrors.ErrAIBadOutput
|
||||
}
|
||||
*fields[i] = resolution
|
||||
|
||||
// fmt.Println(resolution)
|
||||
}
|
||||
|
||||
} else if aiModel == "Google" {
|
||||
// TranslateText is the standard Cloud Translation v3 endpoint.
|
||||
req := &translatepb.TranslateTextRequest{
|
||||
Parent: fmt.Sprintf("projects/%s/locations/global", s.projectID),
|
||||
TargetLanguageCode: lang.ISOCode,
|
||||
MimeType: "text/html",
|
||||
Contents: []string{request},
|
||||
}
|
||||
responseGoogle, err := s.googleCli.TranslateText(s.ctx, req)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// TranslateText returns one Translation per input string.
|
||||
if len(responseGoogle.GetTranslations()) == 0 {
|
||||
return nil, responseErrors.ErrAIBadOutput
|
||||
}
|
||||
response := responseGoogle.GetTranslations()[0].GetTranslatedText()
|
||||
|
||||
for i := 0; i < len(keys); i++ {
|
||||
success, match := getStringInBetween(response, "<"+keys[i]+">", "</"+keys[i]+">")
|
||||
if !success || !isValidXHTML(match) {
|
||||
return nil, responseErrors.ErrAIBadOutput
|
||||
}
|
||||
*fields[i] = match
|
||||
|
||||
// fmt.Println(match)
|
||||
}
|
||||
}
|
||||
|
||||
return productDescription, nil
|
||||
}
|
||||
|
||||
func cleanForPrompt(s string) string {
|
||||
r := strings.NewReader(s)
|
||||
d := xml.NewDecoder(r)
|
||||
|
||||
prompt := ""
|
||||
|
||||
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
|
||||
d.Strict = true
|
||||
d.AutoClose = xml.HTMLAutoClose
|
||||
d.Entity = xml.HTMLEntity
|
||||
for {
|
||||
token, err := d.Token()
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
|
||||
switch v := token.(type) {
|
||||
case xml.StartElement:
|
||||
prompt += "<" + attrName(v.Name)
|
||||
|
||||
for _, attr := range v.Attr {
|
||||
if v.Name.Local == "img" && attr.Name.Local == "alt" {
|
||||
prompt += fmt.Sprintf(` %s="%s"`, attrName(attr.Name), attr.Value)
|
||||
}
|
||||
}
|
||||
|
||||
prompt += ">"
|
||||
case xml.EndElement:
|
||||
prompt += "</" + attrName(v.Name) + ">"
|
||||
case xml.CharData:
|
||||
prompt += string(v)
|
||||
case xml.Comment:
|
||||
case xml.ProcInst:
|
||||
case xml.Directive:
|
||||
}
|
||||
}
|
||||
|
||||
return prompt
|
||||
}
|
||||
|
||||
func resolveResponse(original string, response string, key string) (bool, string) {
|
||||
success, match := getStringInBetween(response, "<"+key+">", "</"+key+">")
|
||||
if !success || !isValidXHTML(match) {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
success, resolution := rebuildFromResponse("<"+key+">"+original+"</"+key+">", "<"+key+">"+match+"</"+key+">")
|
||||
if !success {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
return true, resolution[2+len(key) : len(resolution)-3-len(key)]
|
||||
}
|
||||
|
||||
// getStringInBetween returns empty string if no start or end string found
|
||||
func getStringInBetween(str string, start string, end string) (success bool, result string) {
|
||||
s := strings.Index(str, start)
|
||||
if s == -1 {
|
||||
return false, ""
|
||||
}
|
||||
s += len(start)
|
||||
e := strings.Index(str[s:], end)
|
||||
if e == -1 {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
return true, str[s : s+e]
|
||||
}
|
||||
|
||||
// isValidXHTML checks if the string obeys the XHTML format
|
||||
func isValidXHTML(s string) bool {
|
||||
r := strings.NewReader(s)
|
||||
d := xml.NewDecoder(r)
|
||||
|
||||
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
|
||||
d.Strict = true
|
||||
d.AutoClose = xml.HTMLAutoClose
|
||||
d.Entity = xml.HTMLEntity
|
||||
for {
|
||||
_, err := d.Token()
|
||||
switch err {
|
||||
case io.EOF:
|
||||
return true // We're done, it's valid!
|
||||
case nil:
|
||||
default:
|
||||
return false // Oops, something wasn't right
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Rebuilds HTML using the original HTML as a template and the response as a source
|
||||
// Assumes that both original and response have the exact same XML structure
|
||||
func rebuildFromResponse(s_original string, s_response string) (bool, string) {
|
||||
|
||||
r_original := strings.NewReader(s_original)
|
||||
d_original := xml.NewDecoder(r_original)
|
||||
|
||||
r_response := strings.NewReader(s_response)
|
||||
d_response := xml.NewDecoder(r_response)
|
||||
|
||||
result := ""
|
||||
|
||||
// Configure the decoder for HTML; leave off strict and autoclose for XHTML
|
||||
d_original.Strict = true
|
||||
d_original.AutoClose = xml.HTMLAutoClose
|
||||
d_original.Entity = xml.HTMLEntity
|
||||
|
||||
d_response.Strict = true
|
||||
d_response.AutoClose = xml.HTMLAutoClose
|
||||
d_response.Entity = xml.HTMLEntity
|
||||
|
||||
token_original, err_original := d_original.Token()
|
||||
token_response, err_response := d_response.Token()
|
||||
|
||||
for {
|
||||
// err_original can only be EOF or nil.
|
||||
if err_original != nil || err_response != nil {
|
||||
if err_original != err_response {
|
||||
return false, ""
|
||||
}
|
||||
return true, result
|
||||
}
|
||||
|
||||
switch v_original := token_original.(type) {
|
||||
case xml.StartElement:
|
||||
switch v_response := token_response.(type) {
|
||||
case xml.StartElement:
|
||||
if v_original.Name.Space != v_response.Name.Space || v_original.Name.Local != v_response.Name.Local {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
result += "<" + attrName(v_original.Name)
|
||||
|
||||
for _, attr := range v_original.Attr {
|
||||
if v_original.Name.Local != "img" || attr.Name.Local != "alt" {
|
||||
result += fmt.Sprintf(` %s="%s"`, attrName(attr.Name), attr.Value)
|
||||
}
|
||||
}
|
||||
|
||||
for _, attr := range v_response.Attr {
|
||||
if v_response.Name.Local == "img" && attr.Name.Local == "alt" {
|
||||
result += fmt.Sprintf(` %s="%s"`, attrName(attr.Name), attr.Value)
|
||||
}
|
||||
}
|
||||
result += ">"
|
||||
|
||||
case xml.CharData:
|
||||
result += string(v_response)
|
||||
token_response, err_response = d_response.Token()
|
||||
continue
|
||||
|
||||
default:
|
||||
return false, ""
|
||||
}
|
||||
|
||||
case xml.EndElement:
|
||||
switch v_response := token_response.(type) {
|
||||
case xml.EndElement:
|
||||
if v_original.Name.Space != v_response.Name.Space || v_original.Name.Local != v_response.Name.Local {
|
||||
return false, ""
|
||||
}
|
||||
|
||||
if v_original.Name.Local != "img" {
|
||||
result += "</" + attrName(v_original.Name) + ">"
|
||||
}
|
||||
|
||||
case xml.CharData:
|
||||
result += string(v_response)
|
||||
token_response, err_response = d_response.Token()
|
||||
continue
|
||||
|
||||
default:
|
||||
return false, ""
|
||||
}
|
||||
|
||||
case xml.CharData:
|
||||
switch v_response := token_response.(type) {
|
||||
case xml.CharData:
|
||||
result += string(v_response)
|
||||
|
||||
case xml.StartElement:
|
||||
result += string(v_original)
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
|
||||
case xml.EndElement:
|
||||
result += string(v_original)
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
|
||||
default:
|
||||
return false, ""
|
||||
}
|
||||
|
||||
case xml.Comment:
|
||||
result += "<!--" + string(v_original) + "-->"
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
|
||||
case xml.ProcInst:
|
||||
if len(v_original.Inst) == 0 {
|
||||
result += "<?" + v_original.Target + "?>"
|
||||
} else {
|
||||
result += "<?" + v_original.Target + " " + string(v_original.Inst) + "?>"
|
||||
}
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
|
||||
case xml.Directive:
|
||||
result += "<!" + string(v_original) + ">"
|
||||
token_original, err_original = d_original.Token()
|
||||
continue
|
||||
}
|
||||
|
||||
token_original, err_original = d_original.Token()
|
||||
token_response, err_response = d_response.Token()
|
||||
}
|
||||
}
|
||||
|
||||
func attrName(name xml.Name) string {
|
||||
if name.Space == "" {
|
||||
return name.Local
|
||||
} else {
|
||||
return name.Space + ":" + name.Local
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user