docker run —name ppok -v /home/app/paddle:/home/paddle -itd registry.baidubce.com/paddlepaddle/paddle:2.5.2
docker exec -it ppok /bin/bash
pip3 install “paddleocr==2.5.0”
pip3 install “paddleclas>=2.4.3”
pip uninstall PyMuPDF
pip install PyMuPDF==1.18.14
https://www.paddlepaddle.org.cn/documentation/docs/zh/2.5/install/docker/linux-docker.html
https://paddlepaddle.github.io/PaddleOCR/latest/ppstructure/model_train/recovery_to_doc.html#1
//第三方
RapidOcr
docker exec ppok paddleocr —image_dir /home/paddle/m7.pdf —use_angle_cls true —use_gpu false —page_num 4
自然语言处理可用版本
docker run -p 9091:9090 —name aocr —env USER_PASSWD=”123456” -v /home/paddle:/home/paddle -itd registry.baidubce.com/paddlepaddle/paddle:2.2.2-jupyter
docker exec -it aocr /bin/bash
pip install paddlenlp==2.3.5
pip install datasets==2.21.0
可用
信息抽取
python3 text_information_extraction.py
问答
python3 test_question_answering.py
https://aistudio.baidu.com/projectdetail/6626468
https://www.cnblogs.com/myqs-java/p/18357187
PPOCR支持
docker run -p 9090:9090 —name bocr —env USER_PASSWD=”123456” -v /home/paddle:/home/paddle -itd registry.baidubce.com/paddlepaddle/paddle:2.5.2-jupyter
docker exec -it bocr /bin/bash
pip install “paddleocr==2.7.3”
apt update
apt install libgl1-mesa-glx -y
apt-get install libglib2.0-0 -y
paddleocr —image_dir /home/paddle/m6.png —use_angle_cls true —use_gpu false
paddleocr —image_dir=/home/paddle/m6.png —type=structure —recovery=true —output=/home/paddle/output
paddleocr —image_dir=/home/paddle/m6.pdf —type=structure —recovery=true —output=/home/paddle/output
mineru OCR
docker pull registry.cn-beijing.aliyuncs.com/quincyqiang/mineru:0.2-models
docker run -itd —name=mineru —gpus=all -p 14050:8000 registry.cn-beijing.aliyuncs.com/quincyqiang/mineru:0.2-models