mirror of
https://github.com/JuLi0n21/thumbnailservice.git
synced 2026-04-20 00:10:07 +00:00
fix ocr
This commit is contained in:
@@ -29,21 +29,29 @@ func main() {
|
||||
}
|
||||
client := pb.NewThumbnailServiceClient(conn)
|
||||
filePath := []thingy{
|
||||
{pb.FileType_IMAGE, "testdata/image-sample.png"},
|
||||
//{pb.FileType_IMAGE, "testdata/image-sample.png"},
|
||||
{pb.FileType_PDF, "testdata/pdf-sample.pdf"},
|
||||
{pb.FileType_VIDEO, "testdata/video-sample.webm"}}
|
||||
|
||||
a := sync.WaitGroup{}
|
||||
|
||||
for _, f := range filePath {
|
||||
a.Add(1)
|
||||
go func() {
|
||||
createPreview(f.Path, f.Type, client)
|
||||
a.Done()
|
||||
}()
|
||||
{pb.FileType_PDF, "testdata/blitzer.pdf"},
|
||||
//{pb.FileType_VIDEO, "testdata/video-sample.webm"}
|
||||
}
|
||||
|
||||
a.Wait()
|
||||
wg := sync.WaitGroup{}
|
||||
|
||||
for _, f := range filePath {
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
createPreview(f.Path, f.Type, client)
|
||||
}()
|
||||
|
||||
go func(f thingy) {
|
||||
defer wg.Done()
|
||||
createOCR(f.Path, f.Type, client)
|
||||
}(f)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
}
|
||||
|
||||
func createPreview(filePath string, ftype pb.FileType, client pb.ThumbnailServiceClient) {
|
||||
@@ -78,6 +86,40 @@ func createPreview(filePath string, ftype pb.FileType, client pb.ThumbnailServic
|
||||
}
|
||||
}
|
||||
|
||||
func createOCR(filePath string, ftype pb.FileType, client pb.ThumbnailServiceClient) {
|
||||
fileContent, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
log.Printf("Error reading file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
req := &pb.OCRFileRequest{
|
||||
FileContent: fileContent,
|
||||
FileType: ftype,
|
||||
CleanUp: true,
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60000*time.Second)
|
||||
defer cancel()
|
||||
|
||||
resp, err := client.OcrFile(ctx, req)
|
||||
if err != nil {
|
||||
log.Printf("Error calling OcrDocument: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("[OCR] %s: %s\n %s", filePath, resp.Message, resp.TextContent)
|
||||
|
||||
if len(resp.OcrContent) > 0 {
|
||||
err := saveToFile([]byte(resp.OcrContent), filePath, "ocr", ".pdf")
|
||||
if err != nil {
|
||||
log.Printf("Error saving OCR text to file: %v", err)
|
||||
} else {
|
||||
fmt.Println("OCR text saved successfully.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Function to save the thumbnail content to a file in the 'thumbnail/' directory
|
||||
func saveThumbnailToFile(thumbnailContent []byte, filePath string) error {
|
||||
// Ensure the "thumbnail" directory exists
|
||||
@@ -97,3 +139,21 @@ func saveThumbnailToFile(thumbnailContent []byte, filePath string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func saveToFile(data []byte, originalPath, folder, ext string) error {
|
||||
err := os.MkdirAll(folder, os.ModePerm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
|
||||
baseName := filepath.Base(originalPath)
|
||||
fileName := strings.TrimSuffix(baseName, filepath.Ext(baseName))
|
||||
|
||||
fullPath := filepath.Join(folder, fileName+ext)
|
||||
err = os.WriteFile(fullPath, data, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save file: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user