|
@@ -30,17 +30,12 @@ class Pdf(Base):
|
|
words = page_obj.get_text("words")
|
|
words = page_obj.get_text("words")
|
|
for w in words:
|
|
for w in words:
|
|
x0, y0, x1, y1, text, *_ = w
|
|
x0, y0, x1, y1, text, *_ = w
|
|
- # 按 scale 缩放
|
|
|
|
- x0 *= scale
|
|
|
|
- y0 *= scale
|
|
|
|
- x1 *= scale
|
|
|
|
- y1 *= scale
|
|
|
|
# 转百分比,方便前端高亮
|
|
# 转百分比,方便前端高亮
|
|
rel_bbox = [
|
|
rel_bbox = [
|
|
- (x0 / (page_width * scale)) * 100,
|
|
|
|
- (y0 / (page_height * scale)) * 100,
|
|
|
|
- (x1 / (page_width * scale)) * 100,
|
|
|
|
- (y1 / (page_height * scale)) * 100,
|
|
|
|
|
|
+ (x0 / (page_width)) * 100,
|
|
|
|
+ (y0 / (page_height)) * 100,
|
|
|
|
+ (x1 / (page_width)) * 100,
|
|
|
|
+ (y1 / (page_height)) * 100,
|
|
]
|
|
]
|
|
result['text'].append({
|
|
result['text'].append({
|
|
"page": page,
|
|
"page": page,
|