如何使用自定义视觉API读取表格格式的数据图像并将其存储到csv文件中?

问题描述 投票:0回答:1

我已经编写了python代码以从图像中读取文本并将其存储到文本文件中。我能够从图像中捕获普通数据,但是很难从图像中捕获表格格式数据并将其存储到csv文件中。我可以从图像中读取一列,但输出中缺少其他列。是否可以通过自定义视觉API实现解决方案?enter image description here

azure azure-cognitive-services microsoft-custom-vision
1个回答
0
投票

Sam,这不是Custom Vision的目的,它具有2种功能:分类或对象检测。

这里您想解析图像中的表单,那里有一个叫做Form Recognizer API的产品:https://azure.microsoft.com/en-us/services/cognitive-services/form-recognizer/#features

请看页面中的预览样本:Form recognizer sample

我通过Form Recognizer的Analyse Layout操作使用您的图像进行了快速测试,这是输出:

{
  "status": "succeeded",
  "createdDateTime": "2020-04-16T17:31:52Z",
  "lastUpdatedDateTime": "2020-04-16T17:31:58Z",
  "analyzeResult": {
    "version": "2.0.0",
    "readResults": [{
      "page": 1,
      "language": "en",
      "angle": 0,
      "width": 467,
      "height": 113,
      "unit": "pixel",
      "lines": [{
        "language": "en",
        "boundingBox": [4, 6, 17, 5, 17, 15, 4, 16],
        "text": "#",
        "words": [{
          "boundingBox": [6, 6, 13, 5, 14, 15, 7, 16],
          "text": "#",
          "confidence": 0.875
        }]
      }, {
        "language": "en",
        "boundingBox": [26, 6, 49, 6, 49, 15, 26, 15],
        "text": "Item",
        "words": [{
          "boundingBox": [27, 6, 47, 6, 47, 15, 27, 15],
          "text": "Item",
          "confidence": 0.683
        }]
      }, {
        "language": "en",
        "boundingBox": [273, 5, 315, 4, 315, 16, 273, 16],
        "text": "QTY/HR",
        "words": [{
          "boundingBox": [274, 5, 313, 5, 314, 16, 274, 16],
          "text": "QTY/HR",
          "confidence": 0.947
        }]
      }, {
        "language": "en",
        "boundingBox": [330, 5, 386, 6, 386, 17, 330, 16],
        "text": "Unit price",
        "words": [{
          "boundingBox": [334, 6, 356, 6, 357, 17, 334, 17],
          "text": "Unit",
          "confidence": 0.959
        }, {
          "boundingBox": [359, 6, 385, 6, 385, 18, 359, 17],
          "text": "price",
          "confidence": 0.959
        }]
      }, {
        "language": "en",
        "boundingBox": [419, 6, 461, 6, 461, 16, 419, 16],
        "text": "Amount",
        "words": [{
          "boundingBox": [420, 6, 461, 7, 461, 17, 420, 17],
          "text": "Amount",
          "confidence": 0.959
        }]
      }, {
        "language": "en",
        "boundingBox": [23, 20, 182, 20, 182, 32, 23, 32],
        "text": "Installed office furniture (hours)",
        "words": [{
          "boundingBox": [26, 21, 68, 21, 68, 32, 26, 32],
          "text": "Installed",
          "confidence": 0.862
        }, {
          "boundingBox": [70, 21, 98, 21, 98, 32, 70, 32],
          "text": "office",
          "confidence": 0.958
        }, {
          "boundingBox": [100, 21, 145, 21, 145, 32, 100, 32],
          "text": "furniture",
          "confidence": 0.958
        }, {
          "boundingBox": [147, 21, 182, 21, 183, 33, 147, 32],
          "text": "(hours)",
          "confidence": 0.914
        }]
      }, {
        "language": "en",
        "boundingBox": [305, 22, 314, 22, 313, 31, 304, 32],
        "text": "3",
        "words": [{
          "boundingBox": [308, 22, 313, 22, 314, 31, 308, 32],
          "text": "3",
          "confidence": 0.891
        }]
      }, {
        "language": "en",
        "boundingBox": [364, 21, 384, 21, 385, 30, 364, 31],
        "text": "150",
        "words": [{
          "boundingBox": [366, 21, 384, 21, 385, 31, 366, 31],
          "text": "150",
          "confidence": 0.958
        }]
      }, {
        "language": "en",
        "boundingBox": [443, 21, 459, 21, 459, 30, 443, 31],
        "text": "450",
        "words": [{
          "boundingBox": [443, 21, 459, 21, 459, 30, 443, 31],
          "text": "450",
          "confidence": 0.694
        }]
      }, {
        "language": "en",
        "boundingBox": [4, 37, 15, 37, 14, 47, 4, 47],
        "text": "2",
        "words": [{
          "boundingBox": [7, 37, 14, 37, 14, 47, 7, 47],
          "text": "2",
          "confidence": 0.891
        }]
      }, {
        "language": "en",
        "boundingBox": [26, 36, 131, 37, 131, 48, 26, 47],
        "text": "Herman Miller Aeron",
        "words": [{
          "boundingBox": [27, 37, 66, 37, 66, 47, 27, 48],
          "text": "Herman",
          "confidence": 0.959
        }, {
          "boundingBox": [69, 37, 99, 37, 99, 48, 69, 47],
          "text": "Miller",
          "confidence": 0.959
        }, {
          "boundingBox": [101, 37, 131, 38, 130, 48, 101, 48],
          "text": "Aeron",
          "confidence": 0.958
        }]
      }, {
        "language": "en",
        "boundingBox": [307, 37, 316, 38, 314, 48, 306, 48],
        "text": "4",
        "words": [{
          "boundingBox": [308, 37, 315, 37, 315, 48, 307, 47],
          "text": "4",
          "confidence": 0.895
        }]
      }, {
        "language": "en",
        "boundingBox": [366, 37, 384, 37, 384, 47, 366, 46],
        "text": "900",
        "words": [{
          "boundingBox": [366, 37, 384, 37, 384, 47, 366, 46],
          "text": "900",
          "confidence": 0.950
        }]
      }, {
        "language": "en",
        "boundingBox": [436, 37, 460, 36, 460, 46, 436, 47],
        "text": "3600",
        "words": [{
          "boundingBox": [436, 37, 460, 36, 460, 46, 436, 47],
          "text": "3600",
          "confidence": 0.890
        }]
      }, {
        "language": "en",
        "boundingBox": [26, 52, 100, 53, 100, 63, 26, 62],
        "text": "Sonos speakers",
        "words": [{
          "boundingBox": [27, 53, 56, 53, 56, 62, 27, 62],
          "text": "Sonos",
          "confidence": 0.959
        }, {
          "boundingBox": [58, 53, 100, 54, 100, 63, 58, 62],
          "text": "speakers",
          "confidence": 0.959
        }]
      }, {
        "language": "en",
        "boundingBox": [304, 52, 316, 52, 315, 62, 303, 62],
        "text": "3",
        "words": [{
          "boundingBox": [307, 52, 314, 52, 314, 62, 307, 62],
          "text": "3",
          "confidence": 0.886
        }]
      }, {
        "language": "en",
        "boundingBox": [365, 51, 385, 51, 384, 62, 365, 62],
        "text": "320",
        "words": [{
          "boundingBox": [365, 51, 385, 51, 385, 62, 365, 62],
          "text": "320",
          "confidence": 0.928
        }]
      }, {
        "language": "en",
        "boundingBox": [444, 52, 455, 52, 455, 61, 444, 61],
        "text": "96",
        "words": [{
          "boundingBox": [444, 52, 454, 52, 454, 61, 444, 61],
          "text": "96",
          "confidence": 0.570
        }]
      }, {
        "language": "en",
        "boundingBox": [27, 67, 138, 67, 138, 79, 27, 79],
        "text": "Giardino Grande Table",
        "words": [{
          "boundingBox": [28, 68, 69, 68, 69, 80, 28, 79],
          "text": "Giardino",
          "confidence": 0.861
        }, {
          "boundingBox": [71, 68, 109, 68, 109, 80, 71, 80],
          "text": "Grande",
          "confidence": 0.959
        }, {
          "boundingBox": [111, 68, 138, 67, 137, 80, 111, 80],
          "text": "Table",
          "confidence": 0.958
        }]
      }, {
        "language": "en",
        "boundingBox": [303, 68, 315, 66, 317, 76, 305, 78],
        "text": "1",
        "words": [{
          "boundingBox": [308, 67, 314, 66, 316, 76, 309, 77],
          "text": "1",
          "confidence": 0.839
        }]
      }, {
        "language": "en",
        "boundingBox": [366, 67, 383, 68, 383, 77, 366, 77],
        "text": "780",
        "words": [{
          "boundingBox": [366, 67, 383, 67, 382, 77, 366, 76],
          "text": "780",
          "confidence": 0.909
        }]
      }, {
        "language": "en",
        "boundingBox": [442, 68, 460, 67, 460, 77, 442, 77],
        "text": "780",
        "words": [{
          "boundingBox": [442, 67, 460, 67, 460, 76, 442, 77],
          "text": "780",
          "confidence": 0.958
        }]
      }]
    }],
    "pageResults": [{
      "page": 1,
      "tables": [{
        "rows": 4,
        "columns": 4,
        "cells": [{
          "rowIndex": 0,
          "columnIndex": 1,
          "text": "Installed office furniture (hours)",
          "boundingBox": [26, 21, 274, 21, 274, 34, 26, 34],
          "elements": ["#/readResults/0/lines/5/words/0", "#/readResults/0/lines/5/words/1", "#/readResults/0/lines/5/words/2", "#/readResults/0/lines/5/words/3"]
        }, {
          "rowIndex": 0,
          "columnIndex": 2,
          "text": "3",
          "boundingBox": [274, 21, 334, 21, 334, 34, 274, 34],
          "elements": ["#/readResults/0/lines/6/words/0"]
        }, {
          "rowIndex": 0,
          "columnIndex": 3,
          "text": "150",
          "boundingBox": [334, 21, 385, 21, 385, 34, 334, 34],
          "elements": ["#/readResults/0/lines/7/words/0"]
        }, {
          "rowIndex": 1,
          "columnIndex": 0,
          "text": "2",
          "boundingBox": [7, 34, 26, 34, 26, 50, 7, 50],
          "elements": ["#/readResults/0/lines/9/words/0"]
        }, {
          "rowIndex": 1,
          "columnIndex": 1,
          "text": "Herman Miller Aeron",
          "boundingBox": [26, 34, 274, 34, 274, 50, 26, 50],
          "elements": ["#/readResults/0/lines/10/words/0", "#/readResults/0/lines/10/words/1", "#/readResults/0/lines/10/words/2"]
        }, {
          "rowIndex": 1,
          "columnIndex": 2,
          "text": "4",
          "boundingBox": [274, 34, 334, 34, 334, 50, 274, 50],
          "elements": ["#/readResults/0/lines/11/words/0"]
        }, {
          "rowIndex": 1,
          "columnIndex": 3,
          "text": "900",
          "boundingBox": [334, 34, 385, 34, 385, 50, 334, 50],
          "elements": ["#/readResults/0/lines/12/words/0"]
        }, {
          "rowIndex": 2,
          "columnIndex": 1,
          "text": "Sonos speakers",
          "boundingBox": [26, 50, 274, 50, 274, 65, 26, 65],
          "elements": ["#/readResults/0/lines/14/words/0", "#/readResults/0/lines/14/words/1"]
        }, {
          "rowIndex": 2,
          "columnIndex": 2,
          "text": "3",
          "boundingBox": [274, 50, 334, 50, 334, 65, 274, 65],
          "elements": ["#/readResults/0/lines/15/words/0"]
        }, {
          "rowIndex": 2,
          "columnIndex": 3,
          "text": "320",
          "boundingBox": [334, 50, 385, 50, 385, 65, 334, 65],
          "elements": ["#/readResults/0/lines/16/words/0"]
        }, {
          "rowIndex": 3,
          "columnIndex": 1,
          "text": "Giardino Grande Table",
          "boundingBox": [26, 65, 274, 65, 274, 79, 26, 79],
          "elements": ["#/readResults/0/lines/18/words/0", "#/readResults/0/lines/18/words/1", "#/readResults/0/lines/18/words/2"]
        }, {
          "rowIndex": 3,
          "columnIndex": 2,
          "text": "1",
          "boundingBox": [274, 65, 334, 65, 334, 79, 274, 79],
          "elements": ["#/readResults/0/lines/19/words/0"]
        }, {
          "rowIndex": 3,
          "columnIndex": 3,
          "text": "780",
          "boundingBox": [334, 65, 385, 65, 385, 79, 334, 79],
          "elements": ["#/readResults/0/lines/20/words/0"]
        }]
      }]
    }]
  }
}

您可以看到,输出中包含数组详细信息,对您来说看起来很有趣!

© www.soinside.com 2019 - 2024. All rights reserved.