first commit

2026-03-02 21:26:32 +08:00
commit e68bb3ac42
8 changed files with 3076 additions and 0 deletions
--- a/技术文档/技术方案.md
+++ b/技术文档/技术方案.md
@@ -0,0 +1,543 @@
+# AISee 技术实现方案
+
+## 系统架构设计
+
+### 整体架构图
+
+```
+┌─────────────────────────────────────────────────────────────┐
+│                        用户层                                │
+│                     AR 智能眼镜                              │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐     │
+│  │  摄像头模块  │  │  显示模块    │  │  传感器模块  │     │
+│  └──────────────┘  └──────────────┘  └──────────────┘     │
+└─────────────────────────────────────────────────────────────┘
+                            ↕ (蓝牙/WiFi)
+┌─────────────────────────────────────────────────────────────┐
+│                      移动端层                                │
+│                    手机 APP                                  │
+│  ┌──────────────────────────────────────────────────────┐  │
+│  │  UI 层：用户界面、设置、历史记录                      │  │
+│  ├──────────────────────────────────────────────────────┤  │
+│  │  业务层：图像处理、数据管理、设备通信                │  │
+│  ├──────────────────────────────────────────────────────┤  │
+│  │  数据层：本地缓存、数据库、网络请求                  │  │
+│  └──────────────────────────────────────────────────────┘  │
+└─────────────────────────────────────────────────────────────┘
+                            ↕ (HTTPS/WebSocket)
+┌─────────────────────────────────────────────────────────────┐
+│                      云端层                                  │
+│  ┌──────────────┐  ┌──────────────┐  ┌──────────────┐     │
+│  │  API 网关    │  │  负载均衡    │  │  CDN         │     │
+│  └──────────────┘  └──────────────┘  └──────────────┘     │
+│  ┌──────────────────────────────────────────────────────┐  │
+│  │              应用服务层                               │  │
+│  │  ┌────────────┐  ┌────────────┐  ┌────────────┐    │  │
+│  │  │ 图像服务   │  │ AI 服务    │  │ 用户服务   │    │  │
+│  │  └────────────┘  └────────────┘  └────────────┘    │  │
+│  └──────────────────────────────────────────────────────┘  │
+│  ┌──────────────────────────────────────────────────────┐  │
+│  │              AI 推理层                                │  │
+│  │  ┌────────────┐  ┌────────────┐  ┌────────────┐    │  │
+│  │  │ 物体识别   │  │ OCR 识别   │  │ 场景理解   │    │  │
+│  │  └────────────┘  └────────────┘  └────────────┘    │  │
+│  └──────────────────────────────────────────────────────┘  │
+│  ┌──────────────────────────────────────────────────────┐  │
+│  │              数据层                                   │  │
+│  │  ┌────────────┐  ┌────────────┐  ┌────────────┐    │  │
+│  │  │ PostgreSQL │  │ Redis      │  │ OSS        │    │  │
+│  │  └────────────┘  └────────────┘  └────────────┘    │  │
+│  └──────────────────────────────────────────────────────┘  │
+└─────────────────────────────────────────────────────────────┘
+```
+
+## 核心模块设计
+
+### 1. 图像采集与传输模块
+
+#### 眼镜端
+```python
+# 伪代码示例
+class CameraModule:
+    def __init__(self):
+        self.camera = Camera(resolution="1920x1080", fps=30)
+        self.encoder = H264Encoder()
+
+    def capture_frame(self):
+        """采集单帧图像"""
+        frame = self.camera.read()
+        return self.preprocess(frame)
+
+    def preprocess(self, frame):
+        """图像预处理"""
+        # 1. 调整分辨率（降低到 640x480 以减少传输）
+        frame = resize(frame, (640, 480))
+        # 2. 压缩质量优化
+        frame = compress(frame, quality=85)
+        return frame
+
+    def stream_to_phone(self):
+        """实时流传输"""
+        while True:
+            frame = self.capture_frame()
+            self.bluetooth.send(frame)
+            time.sleep(0.033)  # 30fps
+```
+
+#### 手机端接收
+```kotlin
+// Android 示例
+class ImageReceiver(private val bluetoothSocket: BluetoothSocket) {
+    private val imageQueue = LinkedBlockingQueue<ByteArray>(10)
+
+    fun startReceiving() {
+        CoroutineScope(Dispatchers.IO).launch {
+            val inputStream = bluetoothSocket.inputStream
+            while (isActive) {
+                val imageData = readImageData(inputStream)
+                imageQueue.offer(imageData)
+            }
+        }
+    }
+
+    fun getNextImage(): ByteArray? {
+        return imageQueue.poll(100, TimeUnit.MILLISECONDS)
+    }
+}
+```
+
+### 2. 手机 APP 架构
+
+#### 目录结构
+```
+app/
+├── data/
+│   ├── local/          # 本地数据源
+│   │   ├── database/   # Room 数据库
+│   │   └── cache/      # 图像缓存
+│   ├── remote/         # 远程数据源
+│   │   ├── api/        # API 接口
+│   │   └── websocket/  # WebSocket 连接
+│   └── repository/     # 数据仓库
+├── domain/
+│   ├── model/          # 领域模型
+│   ├── usecase/        # 业务用例
+│   └── repository/     # 仓库接口
+├── presentation/
+│   ├── main/           # 主界面
+│   ├── settings/       # 设置界面
+│   ├── history/        # 历史记录
+│   └── viewmodel/      # ViewModel
+└── device/
+    ├── bluetooth/      # 蓝牙通信
+    └── camera/         # 相机处理
+```
+
+#### 核心业务流程
+```kotlin
+class ImageProcessingViewModel @Inject constructor(
+    private val imageRepository: ImageRepository,
+    private val aiService: AIService
+) : ViewModel() {
+
+    private val _aiResult = MutableStateFlow<AIResult?>(null)
+    val aiResult: StateFlow<AIResult?> = _aiResult.asStateFlow()
+
+    fun processImage(imageData: ByteArray) {
+        viewModelScope.launch {
+            try {
+                // 1. 保存到本地缓存
+                val imageId = imageRepository.saveImage(imageData)
+
+                // 2. 上传到服务器
+                val uploadResult = imageRepository.uploadImage(imageId, imageData)
+
+                // 3. 请求 AI 分析
+                val result = aiService.analyzeImage(uploadResult.url)
+
+                // 4. 更新 UI
+                _aiResult.value = result
+
+                // 5. 发送结果到眼镜
+                sendToGlasses(result)
+
+            } catch (e: Exception) {
+                handleError(e)
+            }
+        }
+    }
+
+    private suspend fun sendToGlasses(result: AIResult) {
+        val displayData = formatForAR(result)
+        bluetoothManager.send(displayData)
+    }
+}
+```
+
+### 3. 后端 API 设计
+
+#### 项目结构
+```
+backend/
+├── app/
+│   ├── api/
+│   │   ├── v1/
+│   │   │   ├── endpoints/
+│   │   │   │   ├── images.py      # 图像上传
+│   │   │   │   ├── analysis.py    # AI 分析
+│   │   │   │   └── users.py       # 用户管理
+│   │   │   └── router.py
+│   │   └── deps.py                # 依赖注入
+│   ├── core/
+│   │   ├── config.py              # 配置
+│   │   ├── security.py            # 安全
+│   │   └── celery_app.py          # 异步任务
+│   ├── models/
+│   │   ├── user.py
+│   │   ├── image.py
+│   │   └── analysis.py
+│   ├── schemas/
+│   │   ├── image.py               # Pydantic 模型
+│   │   └── analysis.py
+│   ├── services/
+│   │   ├── ai/
+│   │   │   ├── object_detection.py
+│   │   │   ├── ocr.py
+│   │   │   ├── scene_understanding.py
+│   │   │   └── model_manager.py
+│   │   ├── storage.py             # 对象存储
+│   │   └── cache.py               # 缓存服务
+│   └── main.py
+├── tests/
+├── requirements.txt
+└── Dockerfile
+```
+
+#### API 端点设计
+```python
+from fastapi import FastAPI, UploadFile, File, BackgroundTasks
+from app.services.ai import AIService
+from app.schemas import AnalysisRequest, AnalysisResponse
+
+app = FastAPI(title="AISee API")
+
+@app.post("/api/v1/images/upload")
+async def upload_image(
+    file: UploadFile = File(...),
+    user_id: str = Depends(get_current_user)
+):
+    """上传图像"""
+    # 1. 验证图像格式
+    validate_image(file)
+
+    # 2. 保存到 OSS
+    image_url = await storage_service.upload(file)
+
+    # 3. 保存元数据到数据库
+    image_record = await db.images.create({
+        "user_id": user_id,
+        "url": image_url,
+        "uploaded_at": datetime.now()
+    })
+
+    return {"image_id": image_record.id, "url": image_url}
+
+@app.post("/api/v1/analysis/analyze", response_model=AnalysisResponse)
+async def analyze_image(
+    request: AnalysisRequest,
+    background_tasks: BackgroundTasks
+):
+    """AI 图像分析"""
+    # 1. 获取图像
+    image = await storage_service.download(request.image_url)
+
+    # 2. 并行执行多个 AI 任务
+    results = await asyncio.gather(
+        ai_service.detect_objects(image),
+        ai_service.recognize_text(image),
+        ai_service.understand_scene(image)
+    )
+
+    # 3. 合并结果
+    analysis_result = merge_results(results)
+
+    # 4. 异步保存到数据库
+    background_tasks.add_task(save_analysis, analysis_result)
+
+    return analysis_result
+
+@app.websocket("/ws/realtime")
+async def websocket_endpoint(websocket: WebSocket):
+    """实时分析 WebSocket"""
+    await websocket.accept()
+
+    try:
+        while True:
+            # 接收图像数据
+            data = await websocket.receive_bytes()
+
+            # 快速分析
+            result = await ai_service.quick_analyze(data)
+
+            # 返回结果
+            await websocket.send_json(result)
+    except WebSocketDisconnect:
+        pass
+```
+
+### 4. AI 推理服务设计
+
+#### 模型管理器
+```python
+class ModelManager:
+    def __init__(self):
+        self.models = {}
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+    def load_models(self):
+        """加载所有模型"""
+        # 物体检测模型
+        self.models['yolo'] = YOLO('yolov8n.pt').to(self.device)
+
+        # OCR 模型
+        self.models['ocr'] = PaddleOCR(use_angle_cls=True, lang='ch')
+
+        # 场景理解模型
+        self.models['clip'] = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+
+        # 多模态大模型
+        self.models['llm'] = load_qwen_vl_model()
+
+    def get_model(self, model_name: str):
+        return self.models.get(model_name)
+
+class AIService:
+    def __init__(self):
+        self.model_manager = ModelManager()
+        self.model_manager.load_models()
+
+    async def detect_objects(self, image: np.ndarray) -> List[Detection]:
+        """物体检测"""
+        model = self.model_manager.get_model('yolo')
+        results = model(image)
+
+        detections = []
+        for r in results:
+            boxes = r.boxes
+            for box in boxes:
+                detections.append({
+                    "class": box.cls,
+                    "confidence": box.conf,
+                    "bbox": box.xyxy.tolist(),
+                    "label": model.names[int(box.cls)]
+                })
+
+        return detections
+
+    async def recognize_text(self, image: np.ndarray) -> List[TextRegion]:
+        """文字识别"""
+        ocr = self.model_manager.get_model('ocr')
+        result = ocr.ocr(image, cls=True)
+
+        text_regions = []
+        for line in result[0]:
+            text_regions.append({
+                "text": line[1][0],
+                "confidence": line[1][1],
+                "bbox": line[0]
+            })
+
+        return text_regions
+
+    async def understand_scene(self, image: np.ndarray) -> SceneDescription:
+        """场景理解"""
+        # 使用多模态大模型生成场景描述
+        llm = self.model_manager.get_model('llm')
+
+        prompt = "请详细描述这张图片中的场景、物体和可能的上下文信息。"
+        description = llm.generate(image, prompt)
+
+        return {
+            "description": description,
+            "tags": extract_tags(description),
+            "sentiment": analyze_sentiment(description)
+        }
+```
+
+### 5. AR 显示模块
+
+#### 数据格式设计
+```json
+{
+  "type": "ar_overlay",
+  "timestamp": 1234567890,
+  "elements": [
+    {
+      "id": "obj_001",
+      "type": "bounding_box",
+      "position": {"x": 100, "y": 150, "width": 200, "height": 300},
+      "label": "水杯",
+      "confidence": 0.95,
+      "color": "#00FF00"
+    },
+    {
+      "id": "text_001",
+      "type": "text_overlay",
+      "position": {"x": 50, "y": 50},
+      "content": "前方有台阶，请小心",
+      "font_size": 24,
+      "color": "#FF0000",
+      "duration": 3000
+    },
+    {
+      "id": "arrow_001",
+      "type": "direction_arrow",
+      "start": {"x": 320, "y": 240},
+      "end": {"x": 400, "y": 240},
+      "label": "出口方向"
+    }
+  ]
+}
+```
+
+#### 眼镜端渲染
+```python
+class ARRenderer:
+    def __init__(self, display):
+        self.display = display
+        self.overlay_queue = queue.Queue()
+
+    def render_frame(self, camera_frame, ar_data):
+        """渲染 AR 叠加层"""
+        # 1. 绘制原始相机画面
+        frame = camera_frame.copy()
+
+        # 2. 绘制 AR 元素
+        for element in ar_data['elements']:
+            if element['type'] == 'bounding_box':
+                self.draw_bbox(frame, element)
+            elif element['type'] == 'text_overlay':
+                self.draw_text(frame, element)
+            elif element['type'] == 'direction_arrow':
+                self.draw_arrow(frame, element)
+
+        # 3. 显示到眼镜屏幕
+        self.display.show(frame)
+
+    def draw_bbox(self, frame, element):
+        """绘制边界框"""
+        pos = element['position']
+        cv2.rectangle(
+            frame,
+            (pos['x'], pos['y']),
+            (pos['x'] + pos['width'], pos['y'] + pos['height']),
+            self.hex_to_rgb(element['color']),
+            2
+        )
+        # 绘制标签
+        cv2.putText(
+            frame,
+            f"{element['label']} {element['confidence']:.2f}",
+            (pos['x'], pos['y'] - 10),
+            cv2.FONT_HERSHEY_SIMPLEX,
+            0.5,
+            self.hex_to_rgb(element['color']),
+            2
+        )
+```
+
+## 性能优化方案
+
+### 1. 图像传输优化
+- 使用 H.264 硬件编码
+- 动态调整分辨率和帧率
+- 实现智能跳帧机制
+- 使用 WiFi Direct 替代蓝牙（高带宽场景）
+
+### 2. AI 推理优化
+- 模型量化（INT8）
+- 批处理推理
+- 模型缓存和预热
+- GPU 并行计算
+- 使用 TensorRT 加速
+
+### 3. 网络优化
+- CDN 加速静态资源
+- 图像压缩和格式优化（WebP）
+- HTTP/2 多路复用
+- 请求合并和批处理
+- 智能重试机制
+
+### 4. 缓存策略
+```python
+# 多级缓存
+class CacheStrategy:
+    def __init__(self):
+        self.l1_cache = LRUCache(maxsize=100)  # 内存缓存
+        self.l2_cache = RedisCache()            # Redis 缓存
+        self.l3_cache = DatabaseCache()         # 数据库
+
+    async def get(self, key):
+        # L1 缓存
+        if key in self.l1_cache:
+            return self.l1_cache[key]
+
+        # L2 缓存
+        value = await self.l2_cache.get(key)
+        if value:
+            self.l1_cache[key] = value
+            return value
+
+        # L3 缓存
+        value = await self.l3_cache.get(key)
+        if value:
+            await self.l2_cache.set(key, value, ttl=3600)
+            self.l1_cache[key] = value
+
+        return value
+```
+
+## 安全方案
+
+### 1. 数据传输安全
+- TLS 1.3 加密
+- 证书固定（Certificate Pinning）
+- 请求签名验证
+
+### 2. 隐私保护
+- 图像本地处理优先
+- 敏感信息脱敏
+- 用户数据加密存储
+- 定期数据清理
+
+### 3. 访问控制
+- JWT 认证
+- OAuth 2.0 授权
+- API 限流
+- IP 白名单
+
+## 监控与运维
+
+### 1. 性能监控
+```python
+# 关键指标
+metrics = {
+    "image_upload_latency": Histogram(),
+    "ai_inference_time": Histogram(),
+    "api_response_time": Histogram(),
+    "error_rate": Counter(),
+    "active_users": Gauge()
+}
+```
+
+### 2. 日志系统
+- 结构化日志（JSON 格式）
+- 分级日志（DEBUG/INFO/WARN/ERROR）
+- 日志聚合和分析
+- 告警机制
+
+### 3. 容灾方案
+- 服务降级
+- 熔断机制
+- 限流保护
+- 数据备份