RoboFlow下载权重模型文件
-
roboflow的free plan不支持下载其他用户的weights file
-
自己只能在Inference deployment上使用自己微调的模型
-
enterprise plan可以支持本地下载模型文件
referenced from: How to download the trained model
RoboFlow 返回的预测结果结构
举个例子:
{'inference_id': '6dc66336-a17b-47a9-bf78-4aa0e6d234b7', 'time': 0.053830977998586604, 'image': {'width': 640, 'height': 640}, 'predictions': [{'x': 500.5, 'y': 444.5, 'width': 31.0, 'height': 9.0, 'confidence': 0.9422910213470459, 'class': 'Tax_Precentage', 'class_id': 8, 'detection_id': '66d21019-2099-48f4-9140-bfac54e44237'}, {'x': 337.0, 'y': 350.0, 'width': 528.0, 'height': 124.0, 'confidence': 0.9179074764251709, 'class': 'Products', 'class_id': 4, 'detection_id': 'f74f11de-77d1-481e-b21c-f24875e20110'}, {'x': 567.5, 'y': 427.5, 'width': 51.0, 'height': 11.0, 'confidence': 0.8719505071640015, 'class': 'Subtotal', 'class_id': 6, 'detection_id': 'a0efa767-58ab-4dcc-bb84-977cb1f7528e'}, {'x': 164.5, 'y': 215.5, 'width': 185.0, 'height': 15.0, 'confidence': 0.8281865119934082, 'class': 'billing address', 'class_id': 10, 'detection_id': 'ad0e30d4-ec34-4277-8e26-1bf56f142dc4'}, {'x': 492.0, 'y': 73.0, 'width': 42.0, 'height': 14.0, 'confidence': 0.808434247970581, 'class': 'invoice number', 'class_id': 12, 'detection_id': '5ee0cb2b-5fe1-45ac-a3b2-ef206bb55776'}, {'x': 568.0, 'y': 445.0, 'width': 50.0, 'height': 10.0, 'confidence': 0.8054320812225342, 'class': 'Tax', 'class_id': 7, 'detection_id': '20ef8f7f-283e-4cdb-9a3e-634ffb9c92c1'}, {'x': 570.5, 'y': 468.0, 'width': 57.0, 'height': 10.0, 'confidence': 0.7871297597885132, 'class': 'total', 'class_id': 14, 'detection_id': '51551e50-6331-41bc-bef9-4df0e9350239'}, {'x': 155.5, 'y': 191.0, 'width': 157.0, 'height': 12.0, 'confidence': 0.7358325719833374, 'class': 'Name_Client', 'class_id': 3, 'detection_id': '43cd1fbb-a0cc-4261-847e-7b1b2c808ae4'}, {'x': 516.0, 'y': 186.5, 'width': 166.0, 'height': 11.0, 'confidence': 0.7109659910202026, 'class': 'Email_Client', 'class_id': 2, 'detection_id': '8c7a9807-d269-49ef-ad72-a9907e5a59a8'}, {'x': 120.5, 'y': 202.5, 'width': 93.0, 'height': 11.0, 'confidence': 0.6150053143501282, 'class': 'Tel_Client', 'class_id': 9, 'detection_id': '723552bf-6a14-4f7d-a9de-6cd2294f8755'}, {'x': 532.0, 'y': 60.5, 'width': 118.0, 'height': 13.0, 'confidence': 0.4191111922264099, 'class': 'Due_Date', 'class_id': 1, 'detection_id': '68516365-1a8a-4145-bb3d-746b6698fade'}, {'x': 531.0, 'y': 60.5, 'width': 116.0, 'height': 13.0, 'confidence': 0.4115637242794037, 'class': 'invoice date', 'class_id': 11, 'detection_id': 'c3fd4696-27e2-4813-98d2-d1c9c436bf36'}]}
- image: 描述图片的大小
- predictions: 描述预测的object detection的结果-位置坐标,区域长宽,置信度,类别信息等
注意结果的{x, y}, 描述的是检测到的物体的中心坐标,而非左上角位置坐标
附上一个识别发票内容的demo
import os
from inference_sdk import InferenceHTTPClient
# CLIENT = InferenceHTTPClient(
# api_url="https://detect.roboflow.com",
# api_key="XHzGnYhn0zaKI19mDAF6"
# )
#
# result = CLIENT.infer("./data/scissors.jpg", model_id="rock-paper-scissors-sxsw/14")
# print(result["predictions"][0]["class"])
#
# from inference import get_model
#
# model = get_model("rock-paper-scissors-sxsw/14", api_key="XHzGnYhn0zaKI19mDAF6")
# # image = "https://website.com/my-image" # or PIL.Image or numpy array
# results = model.infer("./data/scissors.jpg")[0]
# print(results)
#
#
#
from inference_sdk import InferenceHTTPClient
from dotenv import load_dotenv
load_dotenv()
CLIENT = InferenceHTTPClient(
api_url="https://detect.roboflow.com",
api_key=os.getenv("API_KEY")
)
img_path = "./data/2.jpg"
result = CLIENT.infer(img_path, model_id="invoice-5wfdh/1")
print(result)
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pytesseract
from PIL import Image
image_width = result['image']['width']
image_height = result['image']['height']
# img = np.zeros((image_width, image_height, 3), dtype=np.uint8)
img = cv2.imread(img_path)
detected_object = []
for prediction in result["predictions"]:
x, y, w, h = int(prediction['x']), int(prediction['y']), int(prediction['width']), int(prediction['height'])
confidence = prediction['confidence']
class_name = prediction['class']
img_ = img[int(y - h / 2): int(y + h / 2), int(x - w / 2): int(x + w / 2)]
# cv2.imshow("", img_)
# cv2.waitKey() text = pytesseract.image_to_string(Image.fromarray(img_))
detected_object.append({
"class": class_name,
"text": text
})
cv2.rectangle(img, (int(x - w / 2), int(y - h / 2)), (int(x + w / 2), int(y + h / 2)), (0, 255, 0))
label = f"{class_name}: {confidence:.2f}"
cv2.putText(img, label, (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
# cv2.imshow("", cv2.resize(img, None, fx=0.5, fy=0.5))
# cv2.imshow("", img)
# cv2.waitKey()
print(detected_object)
cv2.imwrite("./out/1.detect.jpg", img)