vision-ocr/tests/test_api_ocr.py

# -*- coding: utf-8 -*-
"""
OCR API 测试
"""

import io

import pytest
from fastapi.testclient import TestClient


class TestOCRRecognizeMultipart:
    """OCR 识别端点测试 (multipart/form-data)"""

    def test_recognize_success(
        self,
        test_client: TestClient,
        sample_image_bytes: bytes,
    ):
        """测试 OCR 识别 - 正常情况"""
        response = test_client.post(
            "/api/v1/ocr/recognize",
            files={"file": ("test.jpg", io.BytesIO(sample_image_bytes), "image/jpeg")},
            data={"lang": "ch", "drop_score": "0.5"},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True
        assert data["data"] is not None
        assert "text_count" in data["data"]
        assert "text_blocks" in data["data"]
        assert "processing_time_ms" in data["data"]

    def test_recognize_with_roi(
        self,
        test_client: TestClient,
        sample_image_bytes: bytes,
    ):
        """测试 OCR 识别 - 带 ROI 参数"""
        response = test_client.post(
            "/api/v1/ocr/recognize",
            files={"file": ("test.jpg", io.BytesIO(sample_image_bytes), "image/jpeg")},
            data={
                "lang": "ch",
                "roi_x": "0.1",
                "roi_y": "0.1",
                "roi_width": "0.8",
                "roi_height": "0.8",
            },
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True

    def test_recognize_with_annotated_image(
        self,
        test_client: TestClient,
        sample_image_bytes: bytes,
    ):
        """测试 OCR 识别 - 返回标注图片"""
        response = test_client.post(
            "/api/v1/ocr/recognize",
            files={"file": ("test.jpg", io.BytesIO(sample_image_bytes), "image/jpeg")},
            data={"return_annotated_image": "true"},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True
        # 注意: 标注图片只有在有识别结果时才返回
        if data["data"]["text_count"] > 0:
            assert data["data"]["annotated_image_base64"] is not None

    def test_recognize_png_image(
        self,
        test_client: TestClient,
        sample_png_bytes: bytes,
    ):
        """测试 OCR 识别 - PNG 格式"""
        response = test_client.post(
            "/api/v1/ocr/recognize",
            files={"file": ("test.png", io.BytesIO(sample_png_bytes), "image/png")},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True

    def test_recognize_invalid_file(
        self,
        test_client: TestClient,
        invalid_file_bytes: bytes,
    ):
        """测试 OCR 识别 - 无效文件"""
        response = test_client.post(
            "/api/v1/ocr/recognize",
            files={
                "file": (
                    "test.txt",
                    io.BytesIO(invalid_file_bytes),
                    "text/plain",
                )
            },
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is False
        assert data["error"] is not None

    def test_recognize_no_file(self, test_client: TestClient):
        """测试 OCR 识别 - 未提供文件"""
        response = test_client.post("/api/v1/ocr/recognize")

        assert response.status_code == 422  # Validation Error


class TestOCRRecognizeBase64:
    """OCR 识别端点测试 (Base64 JSON)"""

    def test_recognize_base64_success(
        self,
        test_client: TestClient,
        sample_image_base64: str,
    ):
        """测试 OCR 识别 (Base64) - 正常情况"""
        response = test_client.post(
            "/api/v1/ocr/recognize/base64",
            json={
                "image_base64": sample_image_base64,
                "lang": "ch",
                "drop_score": 0.5,
            },
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True
        assert data["data"] is not None

    def test_recognize_base64_with_data_url(
        self,
        test_client: TestClient,
        sample_image_base64: str,
    ):
        """测试 OCR 识别 (Base64) - Data URL 格式"""
        data_url = f"data:image/jpeg;base64,{sample_image_base64}"
        response = test_client.post(
            "/api/v1/ocr/recognize/base64",
            json={"image_base64": data_url},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True

    def test_recognize_base64_with_roi(
        self,
        test_client: TestClient,
        sample_image_base64: str,
    ):
        """测试 OCR 识别 (Base64) - 带 ROI 参数"""
        response = test_client.post(
            "/api/v1/ocr/recognize/base64",
            json={
                "image_base64": sample_image_base64,
                "roi": {"x": 0.1, "y": 0.1, "width": 0.8, "height": 0.8},
            },
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True

    def test_recognize_base64_invalid(self, test_client: TestClient):
        """测试 OCR 识别 (Base64) - 无效 Base64"""
        response = test_client.post(
            "/api/v1/ocr/recognize/base64",
            json={"image_base64": "not-valid-base64!!!"},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is False
        assert data["error"] is not None

    def test_recognize_base64_missing_field(self, test_client: TestClient):
        """测试 OCR 识别 (Base64) - 缺少必填字段"""
        response = test_client.post(
            "/api/v1/ocr/recognize/base64",
            json={"lang": "ch"},  # 缺少 image_base64
        )

        assert response.status_code == 422  # Validation Error


class TestExpressMultipart:
    """快递单解析端点测试 (multipart/form-data)"""

    def test_express_success(
        self,
        test_client: TestClient,
        sample_image_bytes: bytes,
    ):
        """测试快递单解析 - 正常情况"""
        response = test_client.post(
            "/api/v1/ocr/express",
            files={"file": ("express.jpg", io.BytesIO(sample_image_bytes), "image/jpeg")},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True
        assert data["data"] is not None
        assert "express_info" in data["data"]
        assert "merged_text" in data["data"]
        assert "processing_time_ms" in data["data"]

    def test_express_info_structure(
        self,
        test_client: TestClient,
        sample_image_bytes: bytes,
    ):
        """测试快递单解析 - 响应结构"""
        response = test_client.post(
            "/api/v1/ocr/express",
            files={"file": ("express.jpg", io.BytesIO(sample_image_bytes), "image/jpeg")},
        )

        assert response.status_code == 200
        data = response.json()
        express_info = data["data"]["express_info"]

        # 验证结构完整性
        assert "tracking_number" in express_info
        assert "sender" in express_info
        assert "receiver" in express_info
        assert "courier_company" in express_info
        assert "confidence" in express_info

        # 验证 sender/receiver 结构
        assert "name" in express_info["sender"]
        assert "phone" in express_info["sender"]
        assert "address" in express_info["sender"]


class TestExpressBase64:
    """快递单解析端点测试 (Base64 JSON)"""

    def test_express_base64_success(
        self,
        test_client: TestClient,
        sample_image_base64: str,
    ):
        """测试快递单解析 (Base64) - 正常情况"""
        response = test_client.post(
            "/api/v1/ocr/express/base64",
            json={"image_base64": sample_image_base64},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True
        assert data["data"] is not None
        assert "express_info" in data["data"]


class TestSecurityValidation:
    """安全验证测试"""

    def test_file_size_limit(self, test_client: TestClient):
        """测试文件大小限制"""
        # 创建一个超大的假文件 (11MB)
        large_content = b"x" * (11 * 1024 * 1024)

        response = test_client.post(
            "/api/v1/ocr/recognize",
            files={"file": ("large.jpg", io.BytesIO(large_content), "image/jpeg")},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is False
        assert "大小" in data["error"]["message"] or "size" in data["error"]["message"].lower()

    def test_invalid_extension(
        self,
        test_client: TestClient,
        sample_image_bytes: bytes,
    ):
        """测试无效文件扩展名"""
        response = test_client.post(
            "/api/v1/ocr/recognize",
            files={"file": ("test.exe", io.BytesIO(sample_image_bytes), "application/octet-stream")},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is False

    def test_magic_bytes_validation(self, test_client: TestClient):
        """测试文件魔数验证"""
        # 创建一个假的 jpg 文件 (扩展名正确但内容不是图片)
        fake_jpg = b"This is not a real JPEG file"

        response = test_client.post(
            "/api/v1/ocr/recognize",
            files={"file": ("fake.jpg", io.BytesIO(fake_jpg), "image/jpeg")},
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is False