|
@@ -67,8 +67,10 @@ class Pdf(Base):
|
|
for i, b in enumerate(blocks):
|
|
for i, b in enumerate(blocks):
|
|
y_top = b["bbox"][1]
|
|
y_top = b["bbox"][1]
|
|
y_bottom = b["bbox"][3]
|
|
y_bottom = b["bbox"][3]
|
|
|
|
+ '''
|
|
if y_top < page_height * 0.02 or y_bottom > page_height * 0.98:
|
|
if y_top < page_height * 0.02 or y_bottom > page_height * 0.98:
|
|
continue
|
|
continue
|
|
|
|
+ '''
|
|
|
|
|
|
if b['type'] == 0:
|
|
if b['type'] == 0:
|
|
text_content = ""
|
|
text_content = ""
|
|
@@ -101,8 +103,10 @@ class Pdf(Base):
|
|
|
|
|
|
elif b['type'] == 1:
|
|
elif b['type'] == 1:
|
|
image_bytes = b.get("image", b"")
|
|
image_bytes = b.get("image", b"")
|
|
|
|
+ '''
|
|
if not image_bytes or len(image_bytes) < 100:
|
|
if not image_bytes or len(image_bytes) < 100:
|
|
continue
|
|
continue
|
|
|
|
+ '''
|
|
|
|
|
|
image_ext = "png"
|
|
image_ext = "png"
|
|
image_file = f"{self.param['path']}page{page_num+1}_img_{i}.{image_ext}"
|
|
image_file = f"{self.param['path']}page{page_num+1}_img_{i}.{image_ext}"
|