mirror of
https://github.com/JuLi0n21/thumbnailservice.git
synced 2026-04-19 16:00:07 +00:00
fix ocr
This commit is contained in:
@@ -29,21 +29,29 @@ func main() {
|
||||
}
|
||||
client := pb.NewThumbnailServiceClient(conn)
|
||||
filePath := []thingy{
|
||||
{pb.FileType_IMAGE, "testdata/image-sample.png"},
|
||||
//{pb.FileType_IMAGE, "testdata/image-sample.png"},
|
||||
{pb.FileType_PDF, "testdata/pdf-sample.pdf"},
|
||||
{pb.FileType_VIDEO, "testdata/video-sample.webm"}}
|
||||
|
||||
a := sync.WaitGroup{}
|
||||
|
||||
for _, f := range filePath {
|
||||
a.Add(1)
|
||||
go func() {
|
||||
createPreview(f.Path, f.Type, client)
|
||||
a.Done()
|
||||
}()
|
||||
{pb.FileType_PDF, "testdata/blitzer.pdf"},
|
||||
//{pb.FileType_VIDEO, "testdata/video-sample.webm"}
|
||||
}
|
||||
|
||||
a.Wait()
|
||||
wg := sync.WaitGroup{}
|
||||
|
||||
for _, f := range filePath {
|
||||
wg.Add(2)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
createPreview(f.Path, f.Type, client)
|
||||
}()
|
||||
|
||||
go func(f thingy) {
|
||||
defer wg.Done()
|
||||
createOCR(f.Path, f.Type, client)
|
||||
}(f)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
}
|
||||
|
||||
func createPreview(filePath string, ftype pb.FileType, client pb.ThumbnailServiceClient) {
|
||||
@@ -78,6 +86,40 @@ func createPreview(filePath string, ftype pb.FileType, client pb.ThumbnailServic
|
||||
}
|
||||
}
|
||||
|
||||
func createOCR(filePath string, ftype pb.FileType, client pb.ThumbnailServiceClient) {
|
||||
fileContent, err := os.ReadFile(filePath)
|
||||
if err != nil {
|
||||
log.Printf("Error reading file: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
req := &pb.OCRFileRequest{
|
||||
FileContent: fileContent,
|
||||
FileType: ftype,
|
||||
CleanUp: true,
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 60000*time.Second)
|
||||
defer cancel()
|
||||
|
||||
resp, err := client.OcrFile(ctx, req)
|
||||
if err != nil {
|
||||
log.Printf("Error calling OcrDocument: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
fmt.Printf("[OCR] %s: %s\n %s", filePath, resp.Message, resp.TextContent)
|
||||
|
||||
if len(resp.OcrContent) > 0 {
|
||||
err := saveToFile([]byte(resp.OcrContent), filePath, "ocr", ".pdf")
|
||||
if err != nil {
|
||||
log.Printf("Error saving OCR text to file: %v", err)
|
||||
} else {
|
||||
fmt.Println("OCR text saved successfully.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Function to save the thumbnail content to a file in the 'thumbnail/' directory
|
||||
func saveThumbnailToFile(thumbnailContent []byte, filePath string) error {
|
||||
// Ensure the "thumbnail" directory exists
|
||||
@@ -97,3 +139,21 @@ func saveThumbnailToFile(thumbnailContent []byte, filePath string) error {
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func saveToFile(data []byte, originalPath, folder, ext string) error {
|
||||
err := os.MkdirAll(folder, os.ModePerm)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create directory: %v", err)
|
||||
}
|
||||
|
||||
baseName := filepath.Base(originalPath)
|
||||
fileName := strings.TrimSuffix(baseName, filepath.Ext(baseName))
|
||||
|
||||
fullPath := filepath.Join(folder, fileName+ext)
|
||||
err = os.WriteFile(fullPath, data, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to save file: %v", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -345,9 +345,10 @@ const file_thumbnail_proto_rawDesc = "" +
|
||||
"\x15FILE_TYPE_UNSPECIFIED\x10\x00\x12\t\n" +
|
||||
"\x05IMAGE\x10\x01\x12\t\n" +
|
||||
"\x05VIDEO\x10\x02\x12\a\n" +
|
||||
"\x03PDF\x10\x032r\n" +
|
||||
"\x03PDF\x10\x032\xc4\x01\n" +
|
||||
"\x10ThumbnailService\x12^\n" +
|
||||
"\x11GenerateThumbnail\x12#.thumbnail_service.ThumbnailRequest\x1a$.thumbnail_service.ThumbnailResponseB\tZ\a./protob\x06proto3"
|
||||
"\x11GenerateThumbnail\x12#.thumbnail_service.ThumbnailRequest\x1a$.thumbnail_service.ThumbnailResponse\x12P\n" +
|
||||
"\aOcrFile\x12!.thumbnail_service.OCRFileRequest\x1a\".thumbnail_service.OCRFileResponseB\tZ\a./protob\x06proto3"
|
||||
|
||||
var (
|
||||
file_thumbnail_proto_rawDescOnce sync.Once
|
||||
@@ -374,9 +375,11 @@ var file_thumbnail_proto_depIdxs = []int32{
|
||||
0, // 0: thumbnail_service.ThumbnailRequest.file_type:type_name -> thumbnail_service.FileType
|
||||
0, // 1: thumbnail_service.OCRFileRequest.file_type:type_name -> thumbnail_service.FileType
|
||||
1, // 2: thumbnail_service.ThumbnailService.GenerateThumbnail:input_type -> thumbnail_service.ThumbnailRequest
|
||||
2, // 3: thumbnail_service.ThumbnailService.GenerateThumbnail:output_type -> thumbnail_service.ThumbnailResponse
|
||||
3, // [3:4] is the sub-list for method output_type
|
||||
2, // [2:3] is the sub-list for method input_type
|
||||
3, // 3: thumbnail_service.ThumbnailService.OcrFile:input_type -> thumbnail_service.OCRFileRequest
|
||||
2, // 4: thumbnail_service.ThumbnailService.GenerateThumbnail:output_type -> thumbnail_service.ThumbnailResponse
|
||||
4, // 5: thumbnail_service.ThumbnailService.OcrFile:output_type -> thumbnail_service.OCRFileResponse
|
||||
4, // [4:6] is the sub-list for method output_type
|
||||
2, // [2:4] is the sub-list for method input_type
|
||||
2, // [2:2] is the sub-list for extension type_name
|
||||
2, // [2:2] is the sub-list for extension extendee
|
||||
0, // [0:2] is the sub-list for field type_name
|
||||
|
||||
@@ -20,6 +20,7 @@ const _ = grpc.SupportPackageIsVersion9
|
||||
|
||||
const (
|
||||
ThumbnailService_GenerateThumbnail_FullMethodName = "/thumbnail_service.ThumbnailService/GenerateThumbnail"
|
||||
ThumbnailService_OcrFile_FullMethodName = "/thumbnail_service.ThumbnailService/OcrFile"
|
||||
)
|
||||
|
||||
// ThumbnailServiceClient is the client API for ThumbnailService service.
|
||||
@@ -29,6 +30,7 @@ const (
|
||||
// Service definition
|
||||
type ThumbnailServiceClient interface {
|
||||
GenerateThumbnail(ctx context.Context, in *ThumbnailRequest, opts ...grpc.CallOption) (*ThumbnailResponse, error)
|
||||
OcrFile(ctx context.Context, in *OCRFileRequest, opts ...grpc.CallOption) (*OCRFileResponse, error)
|
||||
}
|
||||
|
||||
type thumbnailServiceClient struct {
|
||||
@@ -49,6 +51,16 @@ func (c *thumbnailServiceClient) GenerateThumbnail(ctx context.Context, in *Thum
|
||||
return out, nil
|
||||
}
|
||||
|
||||
func (c *thumbnailServiceClient) OcrFile(ctx context.Context, in *OCRFileRequest, opts ...grpc.CallOption) (*OCRFileResponse, error) {
|
||||
cOpts := append([]grpc.CallOption{grpc.StaticMethod()}, opts...)
|
||||
out := new(OCRFileResponse)
|
||||
err := c.cc.Invoke(ctx, ThumbnailService_OcrFile_FullMethodName, in, out, cOpts...)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// ThumbnailServiceServer is the server API for ThumbnailService service.
|
||||
// All implementations must embed UnimplementedThumbnailServiceServer
|
||||
// for forward compatibility.
|
||||
@@ -56,6 +68,7 @@ func (c *thumbnailServiceClient) GenerateThumbnail(ctx context.Context, in *Thum
|
||||
// Service definition
|
||||
type ThumbnailServiceServer interface {
|
||||
GenerateThumbnail(context.Context, *ThumbnailRequest) (*ThumbnailResponse, error)
|
||||
OcrFile(context.Context, *OCRFileRequest) (*OCRFileResponse, error)
|
||||
mustEmbedUnimplementedThumbnailServiceServer()
|
||||
}
|
||||
|
||||
@@ -69,6 +82,9 @@ type UnimplementedThumbnailServiceServer struct{}
|
||||
func (UnimplementedThumbnailServiceServer) GenerateThumbnail(context.Context, *ThumbnailRequest) (*ThumbnailResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method GenerateThumbnail not implemented")
|
||||
}
|
||||
func (UnimplementedThumbnailServiceServer) OcrFile(context.Context, *OCRFileRequest) (*OCRFileResponse, error) {
|
||||
return nil, status.Errorf(codes.Unimplemented, "method OcrFile not implemented")
|
||||
}
|
||||
func (UnimplementedThumbnailServiceServer) mustEmbedUnimplementedThumbnailServiceServer() {}
|
||||
func (UnimplementedThumbnailServiceServer) testEmbeddedByValue() {}
|
||||
|
||||
@@ -108,6 +124,24 @@ func _ThumbnailService_GenerateThumbnail_Handler(srv interface{}, ctx context.Co
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
func _ThumbnailService_OcrFile_Handler(srv interface{}, ctx context.Context, dec func(interface{}) error, interceptor grpc.UnaryServerInterceptor) (interface{}, error) {
|
||||
in := new(OCRFileRequest)
|
||||
if err := dec(in); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if interceptor == nil {
|
||||
return srv.(ThumbnailServiceServer).OcrFile(ctx, in)
|
||||
}
|
||||
info := &grpc.UnaryServerInfo{
|
||||
Server: srv,
|
||||
FullMethod: ThumbnailService_OcrFile_FullMethodName,
|
||||
}
|
||||
handler := func(ctx context.Context, req interface{}) (interface{}, error) {
|
||||
return srv.(ThumbnailServiceServer).OcrFile(ctx, req.(*OCRFileRequest))
|
||||
}
|
||||
return interceptor(ctx, in, info, handler)
|
||||
}
|
||||
|
||||
// ThumbnailService_ServiceDesc is the grpc.ServiceDesc for ThumbnailService service.
|
||||
// It's only intended for direct use with grpc.RegisterService,
|
||||
// and not to be introspected or modified (even as a copy)
|
||||
@@ -119,6 +153,10 @@ var ThumbnailService_ServiceDesc = grpc.ServiceDesc{
|
||||
MethodName: "GenerateThumbnail",
|
||||
Handler: _ThumbnailService_GenerateThumbnail_Handler,
|
||||
},
|
||||
{
|
||||
MethodName: "OcrFile",
|
||||
Handler: _ThumbnailService_OcrFile_Handler,
|
||||
},
|
||||
},
|
||||
Streams: []grpc.StreamDesc{},
|
||||
Metadata: "thumbnail.proto",
|
||||
|
||||
Reference in New Issue
Block a user