rabin 2 månader sedan
förälder
incheckning
6733930a09
7 ändrade filer med 140 tillägg och 108 borttagningar
  1. 0 1
      admin.py
  2. 0 23
      check.py
  3. 135 0
      diviner.py
  4. 0 1
      front.py
  5. 0 25
      install.py
  6. 0 56
      master_cron.py
  7. 5 2
      service/extract/docs/pdf.py

+ 0 - 1
admin.py

@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
     demeter web

+ 0 - 23
check.py

@@ -1,23 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-import time
-import os
-timeSleep = 15
-
-def popen(command, bg=False):
-        string = command
-        if bg == True:
-                command = command + ' &'
-        process = os.popen(command)
-        output = process.read()
-        process.close()
-        return output
-
-def process():
-        command = 'python /data/dm/container/web/master_cron.py 1>/dev/null 2>&1 &'
-        check = 'ps -ef | grep master_cron.py | grep -v grep | wc -l'
-        value = int(popen(check))
-        if value == 0:
-                popen(command)
-
-process()

+ 135 - 0
diviner.py

@@ -0,0 +1,135 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+import os
+import sys
+import subprocess
+from demeter.core import *
+
+def init():
+    model = Demeter.model('manage_admin')
+    model.id = 1
+    info = model.select(type='fetchone')
+    if not info:
+        model.role_id = 1
+        model.username = 'admin'
+        model.mobile = '15810090845'
+        model.password = '123456'
+        model.insert()
+
+    model = Demeter.model('manage_role')
+    model.id = 1
+    info = model.select(type='fetchone')
+    if not info:
+        model.name = u'管理员'
+        model.insert()
+
+def get_service_name(script_name):
+    return 'diviner_' + script_name.replace(".py", "")
+
+def install(script_name):
+    init()
+    bin_path = '/usr/local/bin'
+    target_path = os.path.abspath(script_name)
+    link_path = os.path.join(bin_path, os.path.splitext(os.path.basename(script_name))[0])
+
+    Demeter.echo(f"安装脚本到 {link_path}")
+    if os.path.exists(link_path):
+        subprocess.run(['sudo', 'rm', '-f', link_path], check=True)
+    subprocess.run(['sudo', 'ln', '-s', target_path, link_path], check=True)
+    subprocess.run(['sudo', 'chmod', '+x', target_path], check=True)
+    Demeter.echo(f"安装完成,可以直接用命令 `{os.path.basename(script_name).replace('.py','')} start` 启动")
+
+def start(script_name, desc):
+    template = """[Unit]
+Description={desc}
+After=network.target
+
+[Service]
+ExecStart={python_path} {workdir}/{script}
+WorkingDirectory={workdir}
+Restart=always
+RestartSec=3
+User={user}
+Environment=PYTHONUNBUFFERED=1
+
+[Install]
+WantedBy=multi-user.target
+"""
+    workdir = os.getcwd()
+    python_path = subprocess.getoutput("which python3")
+    user = subprocess.getoutput("whoami")
+    service_name = get_service_name(script_name)
+    service_file = f"/etc/systemd/system/{service_name}.service"
+    
+    content = template.format(
+        desc=desc,
+        python_path=python_path,
+        workdir=workdir,
+        script=script_name,
+        user=user,
+    )
+
+    # 写入临时 service 文件
+    with open(f"/tmp/{service_name}.service", "w") as f:
+        f.write(content)
+
+    # 移动到 systemd 目录(需 root 权限)
+    subprocess.run(["sudo", "mv", f"/tmp/{service_name}.service", service_file], check=True)
+
+    # 重新加载 systemd,启用并启动服务
+    subprocess.run(["sudo", "systemctl", "daemon-reload"], check=True)
+    subprocess.run(["sudo", "systemctl", "enable", service_name], check=True)
+    subprocess.run(["sudo", "systemctl", "restart", service_name], check=True)
+
+    Demeter.echo(f"已安装并启动服务: {service_name}")
+
+def stop(script_name):
+    service_name = get_service_name(script_name)
+    Demeter.echo(f"停止服务: {service_name}")
+    subprocess.run(["sudo", "systemctl", "stop", service_name], check=True)
+    subprocess.run(["sudo", "systemctl", "disable", service_name], check=True)
+
+def restart(script_name):
+    service_name = get_service_name(script_name)
+    Demeter.echo(f"重启服务: {service_name}")
+    subprocess.run(["sudo", "systemctl", "restart", service_name], check=True)
+
+def main():
+    services = [
+        ("front.py", "Diviner Front Service"),
+        ("admin.py", "Diviner Admin Service"),
+        ("cron.py", "Diviner Cron Service"),
+    ]
+
+    action = "start"
+    if len(sys.argv) > 1:
+        action = sys.argv[1].lower()
+
+    if action == "install":
+        current_script = os.path.basename(sys.argv[0])
+        if not os.path.exists(current_script):
+            Demeter.echo(f"当前脚本文件不存在: {current_script}")
+            sys.exit(1)
+        install(current_script)
+    else:
+        valid_services = [(s, d) for s, d in services if os.path.exists(s)]
+        for s, d in services:
+            if not os.path.exists(s):
+                Demeter.echo(f"脚本不存在: {s}")
+
+        if action == "start":
+            for script, desc in valid_services:
+                start(script, desc)
+        elif action == "stop":
+            for script, _ in valid_services:
+                stop(script)
+        elif action == "restart":
+            for script, _ in valid_services:
+                restart(script)
+        else:
+            Demeter.echo(f"未知动作: {action}")
+            sys.exit(1)
+
+if __name__ == "__main__":
+    main()
+

+ 0 - 1
front.py

@@ -1,4 +1,3 @@
-#!/usr/bin/env python
 # -*- coding: utf-8 -*-
 """
     demeter web

+ 0 - 25
install.py

@@ -1,25 +0,0 @@
-# -*- coding: utf-8 -*-
-from demeter.core import *
-
-#CREATE DATABASE IF NOT EXISTS yourdbname DEFAULT CHARSET utf8 COLLATE utf8_general_ci;
-def manage():
-	model = Demeter.model('manage_admin')
-	model.id = 1
-	info = model.select(type='fetchone')
-	if not info:
-		model.role_id = 1
-		model.username = 'admin'
-		model.mobile = '15810090845'
-		model.password = '123456'
-		model.insert()
-
-	model = Demeter.model('manage_role')
-	model.id = 1
-	info = model.select(type='fetchone')
-	if not info:
-		model.name = u'管理员'
-		model.insert()
-
-manage()
-
-Demeter.echo('install success!')

+ 0 - 56
master_cron.py

@@ -1,56 +0,0 @@
-#!/usr/bin/env python
-# -*- coding: utf-8 -*-
-# 用于批量处理转换 运行多个转换容器
-# nohup python /data/dm/container/web/master.py 2>/dev/null &
-import time
-import subprocess
-import os
-import pprint
-timeSleep = 120
-
-def redis():
-        import redis
-        host = '0.0.0.0'
-        port = 6379
-        password = 'dm_redis_123'
-        pool = redis.ConnectionPool(host=host, password=password, port=int(port))
-        return redis.Redis(connection_pool=pool)
-
-def command(file):
-        #return 'dm call office-convert_call id=' + file
-        return 'docker run -d -it --entrypoint python --rm -v /data/dm/container/share:/share -v /etc/hosts:/etc/hosts.main --env HOSTIP="172.30.0.6" --net=dm -v /data/dm/container/web:/www -v /data/dm/container/web/convert/static:/usr/local/convert/front/static -v /data/dm/container/web/convert/files:/usr/local/convert/runtime/files registry.cn-hangzhou.aliyuncs.com/shemic/convert  /usr/local/convert/convert.py -f ' + file
-
-def popen(command, bg=False):
-        string = command
-        if bg == True:
-                command = command + ' &'
-        process = os.popen(command)
-        output = process.read()
-        process.close()
-        return output
-
-# 文档转换
-def convert():
-        check = 'ps -ef | grep master_cron.py | grep -v grep | wc -l'
-        value = int(popen(check))
-        if value > 1:
-                return 1;
-        r = redis()
-        c = 'office_file'
-        i = 0
-        # r.ltrim("list2", 0, 1)
-        # n = r.llen(c)
-        while 1:
-                file = r.lpop(c)
-                if file:
-                        g = command(file)
-                        popen(g, False)
-                i = i+1
-                if i >= 10:
-                        time.sleep(timeSleep)
-                        i = 0
-
-def handle():
-        convert()
-
-handle()

+ 5 - 2
service/extract/docs/pdf.py

@@ -53,7 +53,7 @@ class Pdf(Base):
                             text_content += span["text"]
                         text_content += "\n"
                     text_content = text_content.strip()
-                    text_content = self.removeDomains(text_content)
+                    text_content = self.clean_text(self.removeDomains(text_content))
                     if text_content:
                         page_items.append({
                             "type": "text",
@@ -126,4 +126,7 @@ class Pdf(Base):
             full_text = text.strip() + "\n" + "\n".join(image_texts)
             document = langchain.schema.Document(page_content=full_text)
             result['content'].append(document)
-        return result
+        return result
+
+    def clean_text(self, s):
+        return re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F\x7F]', '', s)