Contact Me!
As I discussed in a previous article, Microsoft Cognitive Services includes a set of APIs that allow your applications to take advantage of Machine Learning in order to analyze, image, sound, video, and language. One of these APIs is a REST web service that can determine the words and punctuation contained in a picture. This is accomplished by a simple REST web service call.
The Cognitive Services Optical Character Recognition (OCR) service is part of the Custom Vision API. It takes as input a picture of text and returns the words found in the image.
To get started, you will need an Azure account and a Cognitive Services Vision API key.
If you don't have an Azure account, you can get a free one at https://azure.microsoft.com/free/.
Once you have an Azure Account, follow the instructions in this article to generate a Cognitive Services Computer Vision key.
To use this API, you simply have to make a POST request to the following URL: https://[location].api.cognitive.microsoft.com/vision/v1.0/ocr
where [location] is the Azure location where you created your API key (above).
Optionally, you can add the following 2 querystring parameters to the URL:
The HTTP header of the request should include the following:
Ocp-Apim-Subscription-Key. The Cognitive Services Computer Vision key you generated above.
Content-Type
This tells the service how you will send the image. The options are:
If the image is accessible via a public URL, set the Content-Type to application/json and send JSON in the body of the HTTP request in the following format
{"url":"imageurl"} where imageurl is a public URL pointing to the image. For example, to perform OCR on an image of an Edgar Allen Poe poem, submit the following JSON:
{"url": "http://media.tumblr.com/tumblr_lrbhs0RY2o1qaaiuh.png"}
If you plan to send the image itself to the web service, set the content type to either "application/octet-stream" or “multipart/form-data” and submit the binary image in the body of the HTTP request.
POST https://westus.api.cognitive.microsoft.com/vision/v1.0/ocr HTTP/1.1 Content-Type: application/json Host: westus.api.cognitive.microsoft.com Content-Length: 62 Ocp-Apim-Subscription-Key: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx { "url": "http://media.tumblr.com/tumblr_lrbhs0RY2o1qaaiuh.png" }
For example, passing a URL with the following picture:
(found online at http://media.tumblr.com/tumblr_lrbhs0RY2o1qaaiuh.png)
returned the following data:
{ "textAngle": 0.0, "orientation": "NotDetected", "language": "en", "regions": [ { "boundingBox": "31,6,435,478", "lines": [ { "boundingBox": "114,6,352,23", "words": [ { "boundingBox": "114,6,24,22", "text": "A" }, { "boundingBox": "144,6,93,23", "text": "Dream" }, { "boundingBox": "245,6,95,23", "text": "Within" }, { "boundingBox": "350,12,14,16", "text": "a" }, { "boundingBox": "373,6,93,23", "text": "Dream" } ] }, { "boundingBox": "31,50,187,16", "words": [ { "boundingBox": "31,50,31,12", "text": "Take" }, { "boundingBox": "66,50,23,12", "text": "this" }, { "boundingBox": "93,50,24,12", "text": "kiss" }, { "boundingBox": "121,54,33,12", "text": "upon" }, { "boundingBox": "158,50,19,12", "text": "the" }, { "boundingBox": "181,50,37,12", "text": "brow!" } ] }, { "boundingBox": "31,67,194,16", "words": [ { "boundingBox": "31,67,31,15", "text": "And," }, { "boundingBox": "67,67,12,12", "text": "in" }, { "boundingBox": "82,67,46,16", "text": "parting" }, { "boundingBox": "132,67,31,12", "text": "from" }, { "boundingBox": "167,71,25,12", "text": "you" }, { "boundingBox": "195,71,30,11", "text": "now," } ] }, { "boundingBox": "31,85,159,12", "words": [ { "boundingBox": "31,85,32,12", "text": "Thus" }, { "boundingBox": "67,85,35,12", "text": "much" }, { "boundingBox": "107,86,16,11", "text": "let" }, { "boundingBox": "126,89,20,8", "text": "me" }, { "boundingBox": "150,89,40,8", "text": "avow-" } ] }, { "boundingBox": "31,102,193,16", "words": [ { "boundingBox": "31,103,26,11", "text": "You" }, { "boundingBox": "61,106,19,8", "text": "are" }, { "boundingBox": "84,104,21,10", "text": "not" }, { "boundingBox": "109,106,44,12", "text": "wrong," }, { "boundingBox": "158,102,27,12", "text": "who" }, { "boundingBox": "189,102,35,12", "text": "deem" } ] }, { "boundingBox": "31,120,214,16", "words": [ { "boundingBox": "31,120,29,12", "text": "That" }, { "boundingBox": "64,124,21,12", "text": "my" }, { "boundingBox": "89,121,29,15", "text": "days" }, { "boundingBox": "122,120,30,12", "text": "have" }, { "boundingBox": "156,121,30,11", "text": "been" }, { "boundingBox": "191,124,7,8", "text": "a" }, { "boundingBox": "202,121,43,14", "text": "dream;" } ] }, { "boundingBox": "31,138,175,16", "words": [ { "boundingBox": "31,139,22,11", "text": "Yet" }, { "boundingBox": "57,138,11,12", "text": "if" }, { "boundingBox": "70,138,31,16", "text": "hope" }, { "boundingBox": "105,138,21,12", "text": "has" }, { "boundingBox": "131,138,37,12", "text": "flown" }, { "boundingBox": "172,142,34,12", "text": "away" } ] }, { "boundingBox": "31,155,140,16", "words": [ { "boundingBox": "31,156,13,11", "text": "In" }, { "boundingBox": "48,159,8,8", "text": "a" }, { "boundingBox": "59,155,37,16", "text": "night," }, { "boundingBox": "100,159,14,8", "text": "or" }, { "boundingBox": "118,155,12,12", "text": "in" }, { "boundingBox": "134,159,7,8", "text": "a" }, { "boundingBox": "145,155,26,16", "text": "day," } ] }, { "boundingBox": "31,173,144,15", "words": [ { "boundingBox": "31,174,13,11", "text": "In" }, { "boundingBox": "48,177,8,8", "text": "a" }, { "boundingBox": "59,173,43,15", "text": "vision," }, { "boundingBox": "107,177,13,8", "text": "or" }, { "boundingBox": "124,173,12,12", "text": "in" }, { "boundingBox": "140,177,35,11", "text": "none," } ] }, { "boundingBox": "31,190,180,16", "words": [ { "boundingBox": "31,191,11,11", "text": "Is" }, { "boundingBox": "47,190,8,12", "text": "it" }, { "boundingBox": "59,190,58,12", "text": "therefore" }, { "boundingBox": "121,190,19,12", "text": "the" }, { "boundingBox": "145,191,23,11", "text": "less" }, { "boundingBox": "173,191,38,15", "text": "gone?" } ] }, { "boundingBox": "31,208,150,12", "words": [ { "boundingBox": "31,208,20,12", "text": "All" }, { "boundingBox": "55,208,24,12", "text": "that" }, { "boundingBox": "83,212,19,8", "text": "we" }, { "boundingBox": "107,212,19,8", "text": "see" }, { "boundingBox": "131,212,13,8", "text": "or" }, { "boundingBox": "148,212,33,8", "text": "seem" } ] }, { "boundingBox": "31,226,194,12", "words": [ { "boundingBox": "31,227,11,11", "text": "Is" }, { "boundingBox": "46,226,21,12", "text": "but" }, { "boundingBox": "71,230,7,8", "text": "a" }, { "boundingBox": "82,226,40,12", "text": "dream" }, { "boundingBox": "126,226,41,12", "text": "within" }, { "boundingBox": "171,230,7,8", "text": "a" }, { "boundingBox": "182,226,43,12", "text": "dream." } ] }, { "boundingBox": "31,261,133,12", "words": [ { "boundingBox": "31,262,5,11", "text": "I" }, { "boundingBox": "41,261,33,12", "text": "stand" }, { "boundingBox": "78,261,32,12", "text": "amid" }, { "boundingBox": "114,261,19,12", "text": "the" }, { "boundingBox": "137,265,27,8", "text": "roar" } ] }, { "boundingBox": "31,278,169,15", "words": [ { "boundingBox": "31,278,18,12", "text": "Of" }, { "boundingBox": "52,282,7,8", "text": "a" }, { "boundingBox": "63,278,95,12", "text": "surf-tormented" }, { "boundingBox": "162,278,38,15", "text": "shore," } ] }, { "boundingBox": "31,296,174,15", "words": [ { "boundingBox": "31,296,28,12", "text": "And" }, { "boundingBox": "63,297,4,11", "text": "I" }, { "boundingBox": "72,296,28,12", "text": "hold" }, { "boundingBox": "104,296,41,12", "text": "within" }, { "boundingBox": "149,300,20,11", "text": "my" }, { "boundingBox": "173,296,32,12", "text": "hand" } ] }, { "boundingBox": "31,314,169,16", "words": [ { "boundingBox": "31,314,42,12", "text": "Grains" }, { "boundingBox": "78,314,15,12", "text": "of" }, { "boundingBox": "95,314,19,12", "text": "the" }, { "boundingBox": "119,315,43,15", "text": "golden" }, { "boundingBox": "167,314,33,12", "text": "sand-" } ] }, { "boundingBox": "31,331,189,16", "words": [ { "boundingBox": "31,332,31,11", "text": "How" }, { "boundingBox": "66,331,28,12", "text": "few!" }, { "boundingBox": "99,333,20,14", "text": "yet" }, { "boundingBox": "123,331,27,12", "text": "how" }, { "boundingBox": "154,331,28,16", "text": "they" }, { "boundingBox": "186,335,34,12", "text": "creep" } ] }, { "boundingBox": "31,349,206,16", "words": [ { "boundingBox": "31,349,55,16", "text": "Through" }, { "boundingBox": "90,353,20,11", "text": "my" }, { "boundingBox": "115,349,44,16", "text": "fingers" }, { "boundingBox": "163,351,12,10", "text": "to" }, { "boundingBox": "179,349,20,12", "text": "the" }, { "boundingBox": "203,350,34,15", "text": "deep," } ] }, { "boundingBox": "31,366,182,16", "words": [ { "boundingBox": "31,366,39,12", "text": "While" }, { "boundingBox": "74,367,5,11", "text": "I" }, { "boundingBox": "83,370,39,12", "text": "weep-" }, { "boundingBox": "126,366,36,12", "text": "while" }, { "boundingBox": "166,367,5,11", "text": "I" }, { "boundingBox": "175,367,38,15", "text": "weep!" } ] }, { "boundingBox": "31,384,147,16", "words": [ { "boundingBox": "31,385,11,11", "text": "O" }, { "boundingBox": "47,384,31,12", "text": "God!" }, { "boundingBox": "84,388,21,8", "text": "can" }, { "boundingBox": "110,385,4,11", "text": "I" }, { "boundingBox": "119,386,20,10", "text": "not" }, { "boundingBox": "144,388,34,12", "text": "grasp" } ] }, { "boundingBox": "31,402,170,16", "words": [ { "boundingBox": "31,402,37,12", "text": "Them" }, { "boundingBox": "72,402,29,12", "text": "with" }, { "boundingBox": "105,406,7,8", "text": "a" }, { "boundingBox": "116,402,42,16", "text": "tighter" }, { "boundingBox": "162,403,39,15", "text": "clasp?" } ] }, { "boundingBox": "31,419,141,12", "words": [ { "boundingBox": "31,420,11,11", "text": "O" }, { "boundingBox": "47,419,31,12", "text": "God!" }, { "boundingBox": "84,423,21,8", "text": "can" }, { "boundingBox": "110,420,4,11", "text": "I" }, { "boundingBox": "119,421,20,10", "text": "not" }, { "boundingBox": "144,423,28,8", "text": "save" } ] }, { "boundingBox": "31,437,179,16", "words": [ { "boundingBox": "31,438,26,11", "text": "One" }, { "boundingBox": "62,437,31,12", "text": "from" }, { "boundingBox": "97,437,19,12", "text": "the" }, { "boundingBox": "120,437,45,16", "text": "pitiless" }, { "boundingBox": "169,438,41,11", "text": "wave?" } ] }, { "boundingBox": "31,454,161,12", "words": [ { "boundingBox": "31,455,11,11", "text": "Is" }, { "boundingBox": "47,454,15,12", "text": "all" }, { "boundingBox": "66,454,25,12", "text": "that" }, { "boundingBox": "94,458,19,8", "text": "we" }, { "boundingBox": "118,458,19,8", "text": "see" }, { "boundingBox": "142,458,13,8", "text": "or" }, { "boundingBox": "159,458,33,8", "text": "seem" } ] }, { "boundingBox": "31,472,185,12", "words": [ { "boundingBox": "31,473,23,11", "text": "But" }, { "boundingBox": "58,476,7,8", "text": "a" }, { "boundingBox": "69,472,40,12", "text": "dream" }, { "boundingBox": "113,472,41,12", "text": "within" }, { "boundingBox": "158,476,7,8", "text": "a" }, { "boundingBox": "169,472,47,12", "text": "dream?" } ] } ] } ] }
Note that the image is split into an array of regions; each region contains an array of lines; and each line contains an array of words. This is done so that you can replace or block out one or more specific words, lines, or regions.
Below is a jQuery code snippet making a request to this service to perform OCR on images of text. You can download the full application at https://github.com/DavidGiard/CognitiveSvcsDemos.
var language = $("#LanguageDropdown").val(); var computerVisionKey = getKey() || "Copy your Subscription key here"; var webSvcUrl = "https://westcentralus.api.cognitive.microsoft.com/vision/v1.0/ocr"; webSvcUrl = webSvcUrl + "?language=" + language; $.ajax({ type: "POST", url: webSvcUrl, headers: { "Ocp-Apim-Subscription-Key": computerVisionKey }, contentType: "application/json", data: '{ "Url": "' + url + '" }' }).done(function (data) { outputDiv.text(""); var regionsOfText = data.regions; for (var h = 0; h < regionsOfText.length; h++) { var linesOfText = data.regions[h].lines; for (var i = 0; i < linesOfText.length; i++) { var output = ""; var thisLine = linesOfText[i]; var words = thisLine.words; for (var j = 0; j < words.length; j++) { var thisWord = words[j]; output += thisWord.text; output += " "; } var newDiv = "<div>" + output + "</div>"; outputDiv.append(newDiv); } outputDiv.append("<hr>"); } }).fail(function (err) { $("#OutputDiv").text("ERROR!" + err.responseText); });
var language = $("#LanguageDropdown").val(); var computerVisionKey = getKey() || "Copy your Subscription key here"; var webSvcUrl = "https://westcentralus.api.cognitive.microsoft.com/vision/v1.0/ocr"; webSvcUrl = webSvcUrl + "?language=" + language; $.ajax({ type: "POST", url: webSvcUrl, headers: { "Ocp-Apim-Subscription-Key": computerVisionKey }, contentType: "application/json", data: '{ "Url": "' + url + '" }' }).done(function (data) { outputDiv.text("");
var regionsOfText = data.regions; for (var h = 0; h < regionsOfText.length; h++) { var linesOfText = data.regions[h].lines; for (var i = 0; i < linesOfText.length; i++) { var output = "";
var thisLine = linesOfText[i]; var words = thisLine.words; for (var j = 0; j < words.length; j++) { var thisWord = words[j]; output += thisWord.text; output += " ";
} var newDiv = "<div>" + output + "</div>"; outputDiv.append(newDiv);
} outputDiv.append("<hr>"); } }).fail(function (err) { $("#OutputDiv").text("ERROR!" + err.responseText); });
You can find the full documentation – including an in-browser testing tool - for this API here.
Sending requests to the Cognitive Services OCR API makes it simple to convert a picture of text into text.