RoboFlow视觉 | Newbieking's Blog

RoboFlow下载权重模型文件

roboflow的free plan不支持下载其他用户的weights file
自己只能在Inference deployment上使用自己微调的模型
enterprise plan可以支持本地下载模型文件

referenced from: How to download the trained model

RoboFlow 返回的预测结果结构

举个例子：

{'inference_id': '6dc66336-a17b-47a9-bf78-4aa0e6d234b7', 'time': 0.053830977998586604, 'image': {'width': 640, 'height': 640}, 'predictions': [{'x': 500.5, 'y': 444.5, 'width': 31.0, 'height': 9.0, 'confidence': 0.9422910213470459, 'class': 'Tax_Precentage', 'class_id': 8, 'detection_id': '66d21019-2099-48f4-9140-bfac54e44237'}, {'x': 337.0, 'y': 350.0, 'width': 528.0, 'height': 124.0, 'confidence': 0.9179074764251709, 'class': 'Products', 'class_id': 4, 'detection_id': 'f74f11de-77d1-481e-b21c-f24875e20110'}, {'x': 567.5, 'y': 427.5, 'width': 51.0, 'height': 11.0, 'confidence': 0.8719505071640015, 'class': 'Subtotal', 'class_id': 6, 'detection_id': 'a0efa767-58ab-4dcc-bb84-977cb1f7528e'}, {'x': 164.5, 'y': 215.5, 'width': 185.0, 'height': 15.0, 'confidence': 0.8281865119934082, 'class': 'billing address', 'class_id': 10, 'detection_id': 'ad0e30d4-ec34-4277-8e26-1bf56f142dc4'}, {'x': 492.0, 'y': 73.0, 'width': 42.0, 'height': 14.0, 'confidence': 0.808434247970581, 'class': 'invoice number', 'class_id': 12, 'detection_id': '5ee0cb2b-5fe1-45ac-a3b2-ef206bb55776'}, {'x': 568.0, 'y': 445.0, 'width': 50.0, 'height': 10.0, 'confidence': 0.8054320812225342, 'class': 'Tax', 'class_id': 7, 'detection_id': '20ef8f7f-283e-4cdb-9a3e-634ffb9c92c1'}, {'x': 570.5, 'y': 468.0, 'width': 57.0, 'height': 10.0, 'confidence': 0.7871297597885132, 'class': 'total', 'class_id': 14, 'detection_id': '51551e50-6331-41bc-bef9-4df0e9350239'}, {'x': 155.5, 'y': 191.0, 'width': 157.0, 'height': 12.0, 'confidence': 0.7358325719833374, 'class': 'Name_Client', 'class_id': 3, 'detection_id': '43cd1fbb-a0cc-4261-847e-7b1b2c808ae4'}, {'x': 516.0, 'y': 186.5, 'width': 166.0, 'height': 11.0, 'confidence': 0.7109659910202026, 'class': 'Email_Client', 'class_id': 2, 'detection_id': '8c7a9807-d269-49ef-ad72-a9907e5a59a8'}, {'x': 120.5, 'y': 202.5, 'width': 93.0, 'height': 11.0, 'confidence': 0.6150053143501282, 'class': 'Tel_Client', 'class_id': 9, 'detection_id': '723552bf-6a14-4f7d-a9de-6cd2294f8755'}, {'x': 532.0, 'y': 60.5, 'width': 118.0, 'height': 13.0, 'confidence': 0.4191111922264099, 'class': 'Due_Date', 'class_id': 1, 'detection_id': '68516365-1a8a-4145-bb3d-746b6698fade'}, {'x': 531.0, 'y': 60.5, 'width': 116.0, 'height': 13.0, 'confidence': 0.4115637242794037, 'class': 'invoice date', 'class_id': 11, 'detection_id': 'c3fd4696-27e2-4813-98d2-d1c9c436bf36'}]}

image: 描述图片的大小
predictions: 描述预测的object detection的结果-位置坐标，区域长宽，置信度，类别信息等

注意结果的{x, y}, 描述的是检测到的物体的中心坐标，而非左上角位置坐标

附上一个识别发票内容的demo

invoice

import os  
  
from inference_sdk import InferenceHTTPClient  
  
# CLIENT = InferenceHTTPClient(  
#     api_url="https://detect.roboflow.com",  
#     api_key="XHzGnYhn0zaKI19mDAF6"  
# )  
#  
# result = CLIENT.infer("./data/scissors.jpg", model_id="rock-paper-scissors-sxsw/14")  
# print(result["predictions"][0]["class"])  
#  
# from inference import get_model  
#  
# model = get_model("rock-paper-scissors-sxsw/14", api_key="XHzGnYhn0zaKI19mDAF6")  
# # image = "https://website.com/my-image" # or PIL.Image or numpy array  
# results = model.infer("./data/scissors.jpg")[0]  
# print(results)  
#  
#  
#  
  
from inference_sdk import InferenceHTTPClient  
  
from dotenv import load_dotenv  
  
load_dotenv()  
  
CLIENT = InferenceHTTPClient(  
    api_url="https://detect.roboflow.com",  
    api_key=os.getenv("API_KEY")  
)  
  
img_path = "./data/2.jpg"  
result = CLIENT.infer(img_path, model_id="invoice-5wfdh/1")  
print(result)  
  
import cv2  
import numpy as np  
import matplotlib.pyplot as plt  
import pytesseract  
from PIL import Image  
  
  
image_width = result['image']['width']  
image_height = result['image']['height']  
# img = np.zeros((image_width, image_height, 3), dtype=np.uint8)  
img = cv2.imread(img_path)  
  
detected_object = []  
  
for prediction in result["predictions"]:  
    x, y, w, h = int(prediction['x']), int(prediction['y']), int(prediction['width']), int(prediction['height'])  
    confidence = prediction['confidence']  
    class_name = prediction['class']  
  
    img_ = img[int(y - h / 2): int(y + h / 2), int(x - w / 2): int(x + w / 2)]  
    # cv2.imshow("", img_)  
    # cv2.waitKey()    text = pytesseract.image_to_string(Image.fromarray(img_))  
    detected_object.append({  
        "class": class_name,  
        "text": text  
    })  
    cv2.rectangle(img, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (0, 255, 0))  
    label = f"{class_name}: {confidence:.2f}"  
    cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))  
  
# cv2.imshow("", cv2.resize(img, None, fx=0.5, fy=0.5))  
# cv2.imshow("", img)  
# cv2.waitKey()  
print(detected_object)  
cv2.imwrite("./out/1.detect.jpg", img)