Python 实现公式图像识别转 Latex（Mathpix）

本文是文本OCR的后续，因为用到了公式识别，所以阅读了 Mathpix API 文档，编写了一份比较适合自己使用的公式识别的Python程序，具体代码如下，注意使用之前应当去 Mathpix 官网申请开发者ID和 KEY其对应的是代码中的APP_ID和APP_KEY后的XXX，在我的代码中加入了使用次数的限制，但是需要手动新建一个 ./count.txt 文件用于初始化使用次数，当然在个人信息页的 Usage 下也可以看到 API 的调用情况包括时间和次数。下面是代码实现，可以直接拷贝使用：

import os
import sys
import json
import time
import base64
import signal
import win32con
import winsound
import requests
from PIL import ImageGrab
import win32clipboard as wc

def set_clipboard(txt):
    wc.OpenClipboard()
    wc.EmptyClipboard()
    wc.SetClipboardData(win32con.CF_UNICODETEXT, txt)
    wc.CloseClipboard()

env = os.environ

default_headers = {
    'app_id': env.get('APP_ID', 'XXX'),
    'app_key': env.get('APP_KEY', 'XXX'),
    'Content-type': 'application/json'
}

service = 'https://api.mathpix.com/v3/latex'

format_set = ["text",
"latex_simplified",
"latex_styled",
"mathml",
"asciimath",
"latex_list"]

format_set_output = ["latex_styled",
"latex_simplified",
"text"]

count = 0

def changeCount(number):
    filehandle = open("./count.txt","w")
    filehandle.write(str(number))
    filehandle.close()

def getCount():
    if not os.path.exists("./count.txt"):
        return 0
    else:
        filehandle = open("./count.txt","r")
        number = int(filehandle.read())
        filehandle.close()
        return number

def image_uri(filename):
    image_data = open(filename, "rb").read()
    return "data:image/jpg;base64," + base64.b64encode(image_data).decode()

def latex(args, headers=default_headers, timeout=30):
    r = requests.post(service,
        data=json.dumps(args), headers=headers, timeout=timeout)
    return json.loads(r.text)

def sig_handler(signum, frame):
    sys.exit(0)

""" 截图后,调用Mathpix 公式识别"""
def LatexOcrScreenshots(path="./",ifauto=False):
    global count
    if count >= 1000:
        print("
The maximum number of uses has been reached!")
        changeCount(count)
        return
    
    if not os.path.exists(path):
        os.makedirs(path)
    image = ImageGrab.grabclipboard()
    if image != None:
        count += 1
        changeCount(count)
        print("
The image has been obtained. Please wait a moment!               ",end=" ")
        filename = str(time.time_ns())
        image.save(path+filename+".png")
        txt = latex({
            'src': image_uri(path+filename+".png"),
            "ocr": ["math", "text"],
            "skip_recrop": True,
            "formats":format_set
        })
        os.remove(path+filename+".png")
		have_obtain = False
        for format_text in format_set_output:
            if format_text in txt:
                set_clipboard("$$
"+txt[format_text]+"
$$")
                have_obtain = True
                break;
        if have_obtain == False:
        	set_clipboard("")
        winsound.PlaySound('SystemAsterisk',winsound.SND_ASYNC)
        return txt
    else :
        if not ifauto:
            print("Count : ",count," Please get the screenshots by Shift+Win+S!",end="")
            return ""
        else:
            print("
Count : ",count," Please get the screenshots by Shift+Win+S!",end="")

def AutoOcrScreenshotsLatex():
    global count
    count = getCount()
    signal.signal(signal.SIGINT, sig_handler)
    signal.signal(signal.SIGTERM, sig_handler)
    print("Count : ",count," Please get the screenshots by Shift+Win+S !",end="")
    while(1):
        try:
            LatexOcrScreenshots(ifauto=True)
            time.sleep(0.1)
        except SystemExit:
            print("
Last Count : ",count)
            changeCount(count)
            return
        else:
            pass
        finally:
            pass

if __name__ == '__main__':
	AutoOcrScreenshots()

可以看出其与百度API不同的地方是，直接使用网站POST便可以实现OCR内容的获取，具体获取的内容是由format_set决定的，而输出的内容的优先级是由format_set_output决定的。

任世事无常，勿忘初心