rabin 2 hónapja
szülő
commit
a2f6e6949e
1 módosított fájl, 4 hozzáadás és 0 törlés
  1. 4 0
      service/extract/docs/pdf.py

+ 4 - 0
service/extract/docs/pdf.py

@@ -67,8 +67,10 @@ class Pdf(Base):
             for i, b in enumerate(blocks):
                 y_top = b["bbox"][1]
                 y_bottom = b["bbox"][3]
+                '''
                 if y_top < page_height * 0.02 or y_bottom > page_height * 0.98:
                     continue
+                '''
 
                 if b['type'] == 0:
                     text_content = ""
@@ -101,8 +103,10 @@ class Pdf(Base):
 
                 elif b['type'] == 1:
                     image_bytes = b.get("image", b"")
+                    '''
                     if not image_bytes or len(image_bytes) < 100:
                         continue
+                    '''
 
                     image_ext = "png"
                     image_file = f"{self.param['path']}page{page_num+1}_img_{i}.{image_ext}"