Merge pull request #18064 from akineeic:gsoc_2020_dnn

[GSoC] Develop OpenCV.js DNN modules for promising web use cases together with their tutorials

* [Opencv.js doc] Init commit to add image classification example in opencv.js tutorial

* [Opencv.js doc] Make the code snippet interactive and put the functions into code snippet.

* Fix the utils.loadOpenCv for promise module

* [Opencv.js doc] Code modify and fixed layout issue.

* [Opencv.js doc] Add a JSON file to store parameters for models and show in the web page.

* [Opencv.js doc] Change let to const.

* [Opencv.js doc] Init commit to add image classification example with camera in opencv.js tutorial

* [Opencv.js doc] Init commit to add semantic segmentation example in opencv.js tutorial

* [Opencv.js doc] Add object detection example, supprot YOLOv2

* [Opencv.js doc] Support SSD model for object detection example

* [Opencv.js doc] Add fast neural style transfer example with opencv.js

* [Opencv.js doc] Add pose estimation example in opencv.js tutorial

* Delete whitespace for code check

* [Opencv.js doc] Add object detection example with camera

* [Opencv.js doc] Add json files containing model information to each example

* [Opencv.js doc] Add a js file for common function in dnn example

* [Opencv.js doc] Create single function getBlobFromImage

* [Opencv.js doc] Add url of model into webpage

* [OpenCV.js doc] Update UI for running

* [Opencv.js doc] Load dnn model by input button

* [Opencv.js doc] Fix some UI issues

* [Opencv.js doc] Change code format

Co-authored-by: Ningxin Hu <>
Zhiming-Zeng 4 years ago committed by GitHub
parent 7c78c59e64
commit 4e4458416d
No known key found for this signature in database
  1. 119
  2. 263
  3. 65
  4. 281
  5. 387
  6. 39
  7. 402
  8. 327
  9. 34
  10. 243
  11. 12
  12. 228
  13. 76
  14. 10
  15. 13
  16. 15
  17. 13
  18. 13
  19. 13
  20. 13
  21. 13
  22. 30
  23. 4

@ -0,0 +1,119 @@
getBlobFromImage = function(inputSize, mean, std, swapRB, image) {
let mat;
if (typeof(image) === 'string') {
mat = cv.imread(image);
} else {
mat = image;
let matC3 = new cv.Mat(mat.matSize[0], mat.matSize[1], cv.CV_8UC3);
cv.cvtColor(mat, matC3, cv.COLOR_RGBA2BGR);
let input = cv.blobFromImage(matC3, std, new cv.Size(inputSize[0], inputSize[1]),
new cv.Scalar(mean[0], mean[1], mean[2]), swapRB);
return input;
loadLables = async function(labelsUrl) {
let response = await fetch(labelsUrl);
let label = await response.text();
label = label.split('\n');
return label;
loadModel = async function(e) {
return new Promise((resolve) => {
let file =[0];
let path =;
let reader = new FileReader();
reader.onload = function(ev) {
if (reader.readyState === 2) {
let buffer = reader.result;
let data = new Uint8Array(buffer);
cv.FS_createDataFile('/', path, data, true, false, false);
getTopClasses = function(probs, labels, topK = 3) {
probs = Array.from(probs);
let indexes =, index) => [prob, index]);
let sorted = indexes.sort((a, b) => {
if (a[0] === b[0]) {return 0;}
return a[0] < b[0] ? -1 : 1;
let classes = [];
for (let i = 0; i < topK; ++i) {
let prob = sorted[i][0];
let index = sorted[i][1];
let c = {
label: labels[index],
prob: (prob * 100).toFixed(2)
return classes;
loadImageToCanvas = function(e, canvasId) {
let files =;
let imgUrl = URL.createObjectURL(files[0]);
let canvas = document.getElementById(canvasId);
let ctx = canvas.getContext('2d');
let img = new Image();
img.crossOrigin = 'anonymous';
img.src = imgUrl;
img.onload = function() {
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
drawInfoTable = async function(jsonUrl, divId) {
let response = await fetch(jsonUrl);
let json = await response.json();
let appendix = document.getElementById(divId);
for (key of Object.keys(json)) {
let h3 = document.createElement('h3');
h3.textContent = key + " model";
let table = document.createElement('table');
let head_tr = document.createElement('tr');
for (head of Object.keys(json[key][0])) {
let th = document.createElement('th');
th.textContent = head; = "1px solid black";
for (model of json[key]) {
let tr = document.createElement('tr');
for (params of Object.keys(model)) {
let td = document.createElement('td'); = "1px solid black";
if (params !== "modelUrl" && params !== "configUrl" && params !== "labelsUrl") {
td.textContent = model[params];
} else {
let a = document.createElement('a');
let link = document.createTextNode('link');
a.href = model[params];
} = "800px"; = "collapse";

@ -0,0 +1,263 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>Image Classification Example</title>
<link href="js_example_style.css" rel="stylesheet" type="text/css" />
<h2>Image Classification Example</h2>
This tutorial shows you how to write an image classification example with OpenCV.js.<br>
To try the example you should click the <b>modelFile</b> button(and <b>configFile</b> button if needed) to upload inference model.
You can find the model URLs and parameters in the <a href="#appendix">model info</a> section.
Then You should change the parameters in the first code snippet according to the uploaded model.
Finally click <b>Try it</b> button to see the result. You can choose any other images.<br>
<div class="control"><button id="tryIt" disabled>Try it</button></div>
<table cellpadding="0" cellspacing="0" width="0" border="0">
<canvas id="canvasInput" width="400" height="400"></canvas>
<table style="visibility: hidden;" id="result">
<th scope="col">#</th>
<th scope="col" width=300>Label</th>
<th scope="col">Probability</th>
<th scope="row">1</th>
<td id="label0" align="center"></td>
<td id="prob0" align="center"></td>
<th scope="row">2</th>
<td id="label1" align="center"></td>
<td id="prob1" align="center"></td>
<th scope="row">3</th>
<td id="label2" align="center"></td>
<td id="prob2" align="center"></td>
<p id='status' align="left"></p>
<div class="caption">
canvasInput <input type="file" id="fileInput" name="file" accept="image/*">
<div class="caption">
modelFile <input type="file" id="modelFile">
<div class="caption">
configFile <input type="file" id="configFile">
<p class="err" id="errorMessage"></p>
<h3>Help function</h3>
<p>1.The parameters for model inference which you can modify to investigate more models.</p>
<textarea class="code" rows="13" cols="100" id="codeEditor" spellcheck="false"></textarea>
<p>2.Main loop in which will read the image from canvas and do inference once.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor1" spellcheck="false"></textarea>
<p>3.Load labels from txt file and process it into an array.</p>
<textarea class="code" rows="7" cols="100" id="codeEditor2" spellcheck="false"></textarea>
<p>4.Get blob from image as input for net, and standardize it with <b>mean</b> and <b>std</b>.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor3" spellcheck="false"></textarea>
<p>5.Fetch model file and save to emscripten file system once click the input button.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor4" spellcheck="false"></textarea>
<p>6.The post-processing, including softmax if needed and get the top classes from the output vector.</p>
<textarea class="code" rows="35" cols="100" id="codeEditor5" spellcheck="false"></textarea>
<div id="appendix">
<h2>Model Info:</h2>
<script src="utils.js" type="text/javascript"></script>
<script src="js_dnn_example_helper.js" type="text/javascript"></script>
<script id="codeSnippet" type="text/code-snippet">
inputSize = [224,224];
mean = [104, 117, 123];
std = 1;
swapRB = false;
// record if need softmax function for post-processing
needSoftmax = false;
// url for label file, can from local or Internet
labelsUrl = "";
<script id="codeSnippet1" type="text/code-snippet">
main = async function() {
const labels = await loadLables(labelsUrl);
const input = getBlobFromImage(inputSize, mean, std, swapRB, 'canvasInput');
let net = cv.readNet(configPath, modelPath);
const start =;
const result = net.forward();
const time =;
const probs = softmax(result);
const classes = getTopClasses(probs, labels);
updateResult(classes, time);
<script id="codeSnippet5" type="text/code-snippet">
softmax = function(result) {
let arr = result.data32F;
if (needSoftmax) {
const maxNum = Math.max(...arr);
const expSum = => Math.exp(num - maxNum)).reduce((a, b) => a + b);
return, index) => {
return Math.exp(value - maxNum) / expSum;
} else {
return arr;
<script type="text/javascript">
let jsonUrl = "js_image_classification_model_info.json";
drawInfoTable(jsonUrl, 'appendix');
let utils = new Utils('errorMessage');
utils.loadCode('codeSnippet', 'codeEditor');
utils.loadCode('codeSnippet1', 'codeEditor1');
let loadLablesCode = 'loadLables = ' + loadLables.toString();
document.getElementById('codeEditor2').value = loadLablesCode;
let getBlobFromImageCode = 'getBlobFromImage = ' + getBlobFromImage.toString();
document.getElementById('codeEditor3').value = getBlobFromImageCode;
let loadModelCode = 'loadModel = ' + loadModel.toString();
document.getElementById('codeEditor4').value = loadModelCode;
utils.loadCode('codeSnippet5', 'codeEditor5');
let getTopClassesCode = 'getTopClasses = ' + getTopClasses.toString();
document.getElementById('codeEditor5').value += '\n' + '\n' + getTopClassesCode;
let canvas = document.getElementById('canvasInput');
let ctx = canvas.getContext('2d');
let img = new Image();
img.crossOrigin = 'anonymous';
img.src = 'space_shuttle.jpg';
img.onload = function() {
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
let tryIt = document.getElementById('tryIt');
tryIt.addEventListener('click', () => {
document.getElementById('status').innerHTML = 'Running function main()...';
if (modelPath === "") {
document.getElementById('status').innerHTML = 'Runing failed.';
utils.printError('Please upload model file by clicking the button first.');
} else {
setTimeout(main, 1);
let fileInput = document.getElementById('fileInput');
fileInput.addEventListener('change', (e) => {
loadImageToCanvas(e, 'canvasInput');
let configPath = "";
let configFile = document.getElementById('configFile');
configFile.addEventListener('change', async (e) => {
configPath = await loadModel(e);
document.getElementById('status').innerHTML = `The config file '${configPath}' is created successfully.`;
let modelPath = "";
let modelFile = document.getElementById('modelFile');
modelFile.addEventListener('change', async (e) => {
modelPath = await loadModel(e);
document.getElementById('status').innerHTML = `The model file '${modelPath}' is created successfully.`;
configPath = "";
configFile.value = "";
utils.loadOpenCv(() => {
var main = async function() {};
var softmax = function(result){};
var getTopClasses = function(mat, labels, topK = 3){};
function updateResult(classes, time) {
classes.forEach((c,i) => {
let labelElement = document.getElementById('label'+i);
let probElement = document.getElementById('prob'+i);
labelElement.innerHTML = c.label;
probElement.innerHTML = c.prob + '%';
let result = document.getElementById('result'); = 'visible';
document.getElementById('status').innerHTML = `<b>Model:</b> ${modelPath}<br>
<b>Inference time:</b> ${time.toFixed(2)} ms`;
} catch(e) {
function initStatus() {
document.getElementById('status').innerHTML = '';
document.getElementById('result').style.visibility = 'hidden';

@ -0,0 +1,65 @@
"caffe": [
"model": "alexnet",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"needSoftmax": "false",
"labelsUrl": "",
"modelUrl": "",
"configUrl": ""
"model": "densenet",
"mean": "127.5, 127.5, 127.5",
"std": "0.007843",
"swapRB": "false",
"needSoftmax": "true",
"labelsUrl": "",
"modelUrl": "",
"configUrl": ""
"model": "googlenet",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"needSoftmax": "false",
"labelsUrl": "",
"modelUrl": "",
"configUrl": ""
"model": "squeezenet",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"needSoftmax": "false",
"labelsUrl": "",
"modelUrl": "",
"configUrl": ""
"model": "VGG",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"needSoftmax": "false",
"labelsUrl": "",
"modelUrl": "",
"configUrl": ""
"tensorflow": [
"model": "inception",
"mean": "123, 117, 104",
"std": "1",
"swapRB": "true",
"needSoftmax": "false",
"labelsUrl": "",
"modelUrl": ""

@ -0,0 +1,281 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>Image Classification Example with Camera</title>
<link href="js_example_style.css" rel="stylesheet" type="text/css" />
<h2>Image Classification Example with Camera</h2>
This tutorial shows you how to write an image classification example with camera.<br>
To try the example you should click the <b>modelFile</b> button(and <b>configFile</b> button if needed) to upload inference model.
You can find the model URLs and parameters in the <a href="#appendix">model info</a> section.
Then You should change the parameters in the first code snippet according to the uploaded model.
Finally click <b>Start/Stop</b> button to start or stop the camera capture.<br>
<div class="control"><button id="startAndStop" disabled>Start</button></div>
<table cellpadding="0" cellspacing="0" width="0" border="0">
<video id="videoInput" width="400" height="400"></video>
<table style="visibility: hidden;" id="result">
<th scope="col">#</th>
<th scope="col" width=300>Label</th>
<th scope="col">Probability</th>
<th scope="row">1</th>
<td id="label0" align="center"></td>
<td id="prob0" align="center"></td>
<th scope="row">2</th>
<td id="label1" align="center"></td>
<td id="prob1" align="center"></td>
<th scope="row">3</th>
<td id="label2" align="center"></td>
<td id="prob2" align="center"></td>
<p id='status' align="left"></p>
<div class="caption">
<div class="caption">
modelFile <input type="file" id="modelFile">
<div class="caption">
configFile <input type="file" id="configFile">
<p class="err" id="errorMessage"></p>
<h3>Help function</h3>
<p>1.The parameters for model inference which you can modify to investigate more models.</p>
<textarea class="code" rows="13" cols="100" id="codeEditor" spellcheck="false"></textarea>
<p>2.The function to capture video from camera, and the main loop in which will do inference once.</p>
<textarea class="code" rows="35" cols="100" id="codeEditor1" spellcheck="false"></textarea>
<p>3.Load labels from txt file and process it into an array.</p>
<textarea class="code" rows="7" cols="100" id="codeEditor2" spellcheck="false"></textarea>
<p>4.Get blob from image as input for net, and standardize it with <b>mean</b> and <b>std</b>.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor3" spellcheck="false"></textarea>
<p>5.Fetch model file and save to emscripten file system once click the input button.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor4" spellcheck="false"></textarea>
<p>6.The post-processing, including softmax if needed and get the top classes from the output vector.</p>
<textarea class="code" rows="35" cols="100" id="codeEditor5" spellcheck="false"></textarea>
<div id="appendix">
<h2>Model Info:</h2>
<script src="utils.js" type="text/javascript"></script>
<script src="js_dnn_example_helper.js" type="text/javascript"></script>
<script id="codeSnippet" type="text/code-snippet">
inputSize = [224,224];
mean = [104, 117, 123];
std = 1;
swapRB = false;
// record if need softmax function for post-processing
needSoftmax = false;
// url for label file, can from local or Internet
labelsUrl = "";
<script id="codeSnippet1" type="text/code-snippet">
let frame = new cv.Mat(video.height, video.width, cv.CV_8UC4);
let cap = new cv.VideoCapture(video);
main = async function(frame) {
const labels = await loadLables(labelsUrl);
const input = getBlobFromImage(inputSize, mean, std, swapRB, frame);
let net = cv.readNet(configPath, modelPath);
const start =;
const result = net.forward();
const time =;
const probs = softmax(result);
const classes = getTopClasses(probs, labels);
updateResult(classes, time);
setTimeout(processVideo, 0);
function processVideo() {
try {
if (!streaming) {
} catch (err) {
setTimeout(processVideo, 0);
<script id="codeSnippet5" type="text/code-snippet">
softmax = function(result) {
let arr = result.data32F;
if (needSoftmax) {
const maxNum = Math.max(...arr);
const expSum = => Math.exp(num - maxNum)).reduce((a, b) => a + b);
return, index) => {
return Math.exp(value - maxNum) / expSum;
} else {
return arr;
<script type="text/javascript">
let jsonUrl = "js_image_classification_model_info.json";
drawInfoTable(jsonUrl, 'appendix');
let utils = new Utils('errorMessage');
utils.loadCode('codeSnippet', 'codeEditor');
utils.loadCode('codeSnippet1', 'codeEditor1');
let loadLablesCode = 'loadLables = ' + loadLables.toString();
document.getElementById('codeEditor2').value = loadLablesCode;
let getBlobFromImageCode = 'getBlobFromImage = ' + getBlobFromImage.toString();
document.getElementById('codeEditor3').value = getBlobFromImageCode;
let loadModelCode = 'loadModel = ' + loadModel.toString();
document.getElementById('codeEditor4').value = loadModelCode;
utils.loadCode('codeSnippet5', 'codeEditor5');
let getTopClassesCode = 'getTopClasses = ' + getTopClasses.toString();
document.getElementById('codeEditor5').value += '\n' + '\n' + getTopClassesCode;
let video = document.getElementById('videoInput');
let streaming = false;
let startAndStop = document.getElementById('startAndStop');
startAndStop.addEventListener('click', () => {
if (!streaming) {
utils.startCamera('qvga', onVideoStarted, 'videoInput');
} else {
let configPath = "";
let configFile = document.getElementById('configFile');
configFile.addEventListener('change', async (e) => {
configPath = await loadModel(e);
document.getElementById('status').innerHTML = `The config file '${configPath}' is created successfully.`;
let modelPath = "";
let modelFile = document.getElementById('modelFile');
modelFile.addEventListener('change', async (e) => {
modelPath = await loadModel(e);
document.getElementById('status').innerHTML = `The model file '${modelPath}' is created successfully.`;
configPath = "";
configFile.value = "";
utils.loadOpenCv(() => {
var main = async function(frame) {};
var softmax = function(result){};
var getTopClasses = function(mat, labels, topK = 3){};
function onVideoStarted() {
streaming = true;
startAndStop.innerText = 'Stop';
videoInput.width = videoInput.videoWidth;
videoInput.height = videoInput.videoHeight;
function onVideoStopped() {
streaming = false;
startAndStop.innerText = 'Start';
function updateResult(classes, time) {
classes.forEach((c,i) => {
let labelElement = document.getElementById('label'+i);
let probElement = document.getElementById('prob'+i);
labelElement.innerHTML = c.label;
probElement.innerHTML = c.prob + '%';
let result = document.getElementById('result'); = 'visible';
document.getElementById('status').innerHTML = `<b>Model:</b> ${modelPath}<br>
<b>Inference time:</b> ${time.toFixed(2)} ms`;
} catch(e) {
function initStatus() {
document.getElementById('status').innerHTML = '';
document.getElementById('result').style.visibility = 'hidden';

@ -0,0 +1,387 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>Object Detection Example</title>
<link href="js_example_style.css" rel="stylesheet" type="text/css" />
<h2>Object Detection Example</h2>
This tutorial shows you how to write an object detection example with OpenCV.js.<br>
To try the example you should click the <b>modelFile</b> button(and <b>configFile</b> button if needed) to upload inference model.
You can find the model URLs and parameters in the <a href="#appendix">model info</a> section.
Then You should change the parameters in the first code snippet according to the uploaded model.
Finally click <b>Try it</b> button to see the result. You can choose any other images.<br>
<div class="control"><button id="tryIt" disabled>Try it</button></div>
<table cellpadding="0" cellspacing="0" width="0" border="0">
<canvas id="canvasInput" width="400" height="400"></canvas>
<canvas id="canvasOutput" style="visibility: hidden;" width="400" height="400"></canvas>
<div class="caption">
canvasInput <input type="file" id="fileInput" name="file" accept="image/*">
<p id='status' align="left"></p>
<div class="caption">
modelFile <input type="file" id="modelFile" name="file">
<div class="caption">
configFile <input type="file" id="configFile">
<p class="err" id="errorMessage"></p>
<h3>Help function</h3>
<p>1.The parameters for model inference which you can modify to investigate more models.</p>
<textarea class="code" rows="15" cols="100" id="codeEditor" spellcheck="false"></textarea>
<p>2.Main loop in which will read the image from canvas and do inference once.</p>
<textarea class="code" rows="16" cols="100" id="codeEditor1" spellcheck="false"></textarea>
<p>3.Load labels from txt file and process it into an array.</p>
<textarea class="code" rows="7" cols="100" id="codeEditor2" spellcheck="false"></textarea>
<p>4.Get blob from image as input for net, and standardize it with <b>mean</b> and <b>std</b>.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor3" spellcheck="false"></textarea>
<p>5.Fetch model file and save to emscripten file system once click the input button.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor4" spellcheck="false"></textarea>
<p>6.The post-processing, including get boxes from output and draw boxes into the image.</p>
<textarea class="code" rows="35" cols="100" id="codeEditor5" spellcheck="false"></textarea>
<div id="appendix">
<h2>Model Info:</h2>
<script src="utils.js" type="text/javascript"></script>
<script src="js_dnn_example_helper.js" type="text/javascript"></script>
<script id="codeSnippet" type="text/code-snippet">
inputSize = [300, 300];
mean = [127.5, 127.5, 127.5];
std = 0.007843;
swapRB = false;
confThreshold = 0.5;
nmsThreshold = 0.4;
// The type of output, can be YOLO or SSD
outType = "SSD";
// url for label file, can from local or Internet
labelsUrl = "";
<script id="codeSnippet1" type="text/code-snippet">
main = async function() {
const labels = await loadLables(labelsUrl);
const input = getBlobFromImage(inputSize, mean, std, swapRB, 'canvasInput');
let net = cv.readNet(configPath, modelPath);
const start =;
const result = net.forward();
const time =;
const output = postProcess(result, labels);
updateResult(output, time);
<script id="codeSnippet5" type="text/code-snippet">
postProcess = function(result, labels) {
let canvasOutput = document.getElementById('canvasOutput');
const outputWidth = canvasOutput.width;
const outputHeight = canvasOutput.height;
const resultData = result.data32F;
// Get the boxes(with class and confidence) from the output
let boxes = [];
switch(outType) {
case "YOLO": {
const vecNum = result.matSize[0];
const vecLength = result.matSize[1];
const classNum = vecLength - 5;
for (let i = 0; i < vecNum; ++i) {
let vector = resultData.slice(i*vecLength, (i+1)*vecLength);
let scores = vector.slice(5, vecLength);
let classId = scores.indexOf(Math.max(...scores));
let confidence = scores[classId];
if (confidence > confThreshold) {
let center_x = Math.round(vector[0] * outputWidth);
let center_y = Math.round(vector[1] * outputHeight);
let width = Math.round(vector[2] * outputWidth);
let height = Math.round(vector[3] * outputHeight);
let left = Math.round(center_x - width / 2);
let top = Math.round(center_y - height / 2);
let box = {
scores: scores,
classId: classId,
confidence: confidence,
bounding: [left, top, width, height],
toDraw: true
// NMS(Non Maximum Suppression) algorithm
let boxNum = boxes.length;
let tmp_boxes = [];
let sorted_boxes = [];
for (let c = 0; c < classNum; ++c) {
for (let i = 0; i < boxes.length; ++i) {
tmp_boxes[i] = [boxes[i], i];
sorted_boxes = tmp_boxes.sort((a, b) => { return (b[0].scores[c] - a[0].scores[c]); });
for (let i = 0; i < boxNum; ++i) {
if (sorted_boxes[i][0].scores[c] === 0) continue;
else {
for (let j = i + 1; j < boxNum; ++j) {
if (IOU(sorted_boxes[i][0], sorted_boxes[j][0]) >= nmsThreshold) {
boxes[sorted_boxes[j][1]].toDraw = false;
} break;
case "SSD": {
const vecNum = result.matSize[2];
const vecLength = 7;
for (let i = 0; i < vecNum; ++i) {
let vector = resultData.slice(i*vecLength, (i+1)*vecLength);
let confidence = vector[2];
if (confidence > confThreshold) {
let left, top, right, bottom, width, height;
left = Math.round(vector[3]);
top = Math.round(vector[4]);
right = Math.round(vector[5]);
bottom = Math.round(vector[6]);
width = right - left + 1;
height = bottom - top + 1;
if (width <= 2 || height <= 2) {
left = Math.round(vector[3] * outputWidth);
top = Math.round(vector[4] * outputHeight);
right = Math.round(vector[5] * outputWidth);
bottom = Math.round(vector[6] * outputHeight);
width = right - left + 1;
height = bottom - top + 1;
let box = {
classId: vector[1] - 1,
confidence: confidence,
bounding: [left, top, width, height],
toDraw: true
} break;
console.error(`Unsupported output type ${outType}`)
// Draw the saved box into the image
let image = cv.imread("canvasInput");
let output = new cv.Mat(outputWidth, outputHeight, cv.CV_8UC3);
cv.cvtColor(image, output, cv.COLOR_RGBA2RGB);
let boxNum = boxes.length;
for (let i = 0; i < boxNum; ++i) {
if (boxes[i].toDraw) {
return output;
// Calculate the IOU(Intersection over Union) of two boxes
function IOU(box1, box2) {
let bounding1 = box1.bounding;
let bounding2 = box2.bounding;
let s1 = bounding1[2] * bounding1[3];
let s2 = bounding2[2] * bounding2[3];
let left1 = bounding1[0];
let right1 = left1 + bounding1[2];
let left2 = bounding2[0];
let right2 = left2 + bounding2[2];
let overlapW = calOverlap([left1, right1], [left2, right2]);
let top1 = bounding2[1];
let bottom1 = top1 + bounding1[3];
let top2 = bounding2[1];
let bottom2 = top2 + bounding2[3];
let overlapH = calOverlap([top1, bottom1], [top2, bottom2]);
let overlapS = overlapW * overlapH;
return overlapS / (s1 + s2 + overlapS);
// Calculate the overlap range of two vector
function calOverlap(range1, range2) {
let min1 = range1[0];
let max1 = range1[1];
let min2 = range2[0];
let max2 = range2[1];
if (min2 > min1 && min2 < max1) {
return max1 - min2;
} else if (max2 > min1 && max2 < max1) {
return max2 - min1;
} else {
return 0;
// Draw one predict box into the origin image
function drawBox(box) {
let bounding = box.bounding;
let left = bounding[0];
let top = bounding[1];
let width = bounding[2];
let height = bounding[3];
cv.rectangle(output, new cv.Point(left, top), new cv.Point(left + width, top + height),
new cv.Scalar(0, 255, 0));
cv.rectangle(output, new cv.Point(left, top), new cv.Point(left + width, top + 15),
new cv.Scalar(255, 255, 255), cv.FILLED);
let text = `${labels[box.classId]}: ${box.confidence.toFixed(4)}`;
cv.putText(output, text, new cv.Point(left, top + 10), cv.FONT_HERSHEY_SIMPLEX, 0.3,
new cv.Scalar(0, 0, 0));
<script type="text/javascript">
let jsonUrl = "js_object_detection_model_info.json";
drawInfoTable(jsonUrl, 'appendix');
let utils = new Utils('errorMessage');
utils.loadCode('codeSnippet', 'codeEditor');
utils.loadCode('codeSnippet1', 'codeEditor1');
let loadLablesCode = 'loadLables = ' + loadLables.toString();
document.getElementById('codeEditor2').value = loadLablesCode;
let getBlobFromImageCode = 'getBlobFromImage = ' + getBlobFromImage.toString();
document.getElementById('codeEditor3').value = getBlobFromImageCode;
let loadModelCode = 'loadModel = ' + loadModel.toString();
document.getElementById('codeEditor4').value = loadModelCode;
utils.loadCode('codeSnippet5', 'codeEditor5');
let canvas = document.getElementById('canvasInput');
let ctx = canvas.getContext('2d');
let img = new Image();
img.crossOrigin = 'anonymous';
img.src = 'lena.png';
img.onload = function() {
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
let tryIt = document.getElementById('tryIt');
tryIt.addEventListener('click', () => {
document.getElementById('status').innerHTML = 'Running function main()...';
if (modelPath === "") {
document.getElementById('status').innerHTML = 'Runing failed.';
utils.printError('Please upload model file by clicking the button first.');
} else {
setTimeout(main, 1);
let fileInput = document.getElementById('fileInput');
fileInput.addEventListener('change', (e) => {
loadImageToCanvas(e, 'canvasInput');
let configPath = "";
let configFile = document.getElementById('configFile');
configFile.addEventListener('change', async (e) => {
configPath = await loadModel(e);
document.getElementById('status').innerHTML = `The config file '${configPath}' is created successfully.`;
let modelPath = "";
let modelFile = document.getElementById('modelFile');
modelFile.addEventListener('change', async (e) => {
modelPath = await loadModel(e);
document.getElementById('status').innerHTML = `The model file '${modelPath}' is created successfully.`;
configPath = "";
configFile.value = "";
utils.loadOpenCv(() => {
var main = async function() {};
var postProcess = function(result, labels) {};
function updateResult(output, time) {
let canvasOutput = document.getElementById('canvasOutput'); = "visible";
cv.imshow('canvasOutput', output);
document.getElementById('status').innerHTML = `<b>Model:</b> ${modelPath}<br>
<b>Inference time:</b> ${time.toFixed(2)} ms`;
} catch(e) {
function initStatus() {
document.getElementById('status').innerHTML = '';
document.getElementById('canvasOutput').style.visibility = "hidden";

@ -0,0 +1,39 @@
"caffe": [
"model": "mobilenet_SSD",
"inputSize": "300, 300",
"mean": "127.5, 127.5, 127.5",
"std": "0.007843",
"swapRB": "false",
"outType": "SSD",
"labelsUrl": "",
"modelUrl": "",
"configUrl": ""
"model": "VGG_SSD",
"inputSize": "300, 300",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"outType": "SSD",
"labelsUrl": "",
"modelUrl": "",
"configUrl": ""
"darknet": [
"model": "yolov2_tiny",
"inputSize": "416, 416",
"mean": "0, 0, 0",
"std": "0.00392",
"swapRB": "false",
"outType": "YOLO",
"labelsUrl": "",
"modelUrl": "",
"configUrl": ""

@ -0,0 +1,402 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>Object Detection Example with Camera</title>
<link href="js_example_style.css" rel="stylesheet" type="text/css" />
<h2>Object Detection Example with Camera </h2>
This tutorial shows you how to write an object detection example with camera.<br>
To try the example you should click the <b>modelFile</b> button(and <b>configInput</b> button if needed) to upload inference model.
You can find the model URLs and parameters in the <a href="#appendix">model info</a> section.
Then You should change the parameters in the first code snippet according to the uploaded model.
Finally click <b>Start/Stop</b> button to start or stop the camera capture.<br>
<div class="control"><button id="startAndStop" disabled>Start</button></div>
<table cellpadding="0" cellspacing="0" width="0" border="0">
<video id="videoInput" width="400" height="400"></video>
<canvas id="canvasOutput" style="visibility: hidden;" width="400" height="400"></canvas>
<div class="caption">
<p id='status' align="left"></p>
<div class="caption">
modelFile <input type="file" id="modelFile" name="file">
<div class="caption">
configFile <input type="file" id="configFile">
<p class="err" id="errorMessage"></p>
<h3>Help function</h3>
<p>1.The parameters for model inference which you can modify to investigate more models.</p>
<textarea class="code" rows="15" cols="100" id="codeEditor" spellcheck="false"></textarea>
<p>2.The function to capture video from camera, and the main loop in which will do inference once.</p>
<textarea class="code" rows="34" cols="100" id="codeEditor1" spellcheck="false"></textarea>
<p>3.Load labels from txt file and process it into an array.</p>
<textarea class="code" rows="7" cols="100" id="codeEditor2" spellcheck="false"></textarea>
<p>4.Get blob from image as input for net, and standardize it with <b>mean</b> and <b>std</b>.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor3" spellcheck="false"></textarea>
<p>5.Fetch model file and save to emscripten file system once click the input button.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor4" spellcheck="false"></textarea>
<p>6.The post-processing, including get boxes from output and draw boxes into the image.</p>
<textarea class="code" rows="35" cols="100" id="codeEditor5" spellcheck="false"></textarea>
<div id="appendix">
<h2>Model Info:</h2>
<script src="utils.js" type="text/javascript"></script>
<script src="js_dnn_example_helper.js" type="text/javascript"></script>
<script id="codeSnippet" type="text/code-snippet">
inputSize = [300, 300];
mean = [127.5, 127.5, 127.5];
std = 0.007843;
swapRB = false;
confThreshold = 0.5;
nmsThreshold = 0.4;
// the type of output, can be YOLO or SSD
outType = "SSD";
// url for label file, can from local or Internet
labelsUrl = "";
<script id="codeSnippet1" type="text/code-snippet">
let frame = new cv.Mat(videoInput.height, videoInput.width, cv.CV_8UC4);
let cap = new cv.VideoCapture(videoInput);
main = async function(frame) {
const labels = await loadLables(labelsUrl);
const input = getBlobFromImage(inputSize, mean, std, swapRB, frame);
let net = cv.readNet(configPath, modelPath);
const start =;
const result = net.forward();
const time =;
const output = postProcess(result, labels, frame);
updateResult(output, time);
setTimeout(processVideo, 0);
function processVideo() {
try {
if (!streaming) {
} catch (err) {
setTimeout(processVideo, 0);
<script id="codeSnippet5" type="text/code-snippet">
postProcess = function(result, labels, frame) {
let canvasOutput = document.getElementById('canvasOutput');
const outputWidth = canvasOutput.width;
const outputHeight = canvasOutput.height;
const resultData = result.data32F;
// Get the boxes(with class and confidence) from the output
let boxes = [];
switch(outType) {
case "YOLO": {
const vecNum = result.matSize[0];
const vecLength = result.matSize[1];
const classNum = vecLength - 5;
for (let i = 0; i < vecNum; ++i) {
let vector = resultData.slice(i*vecLength, (i+1)*vecLength);
let scores = vector.slice(5, vecLength);
let classId = scores.indexOf(Math.max(...scores));
let confidence = scores[classId];
if (confidence > confThreshold) {
let center_x = Math.round(vector[0] * outputWidth);
let center_y = Math.round(vector[1] * outputHeight);
let width = Math.round(vector[2] * outputWidth);
let height = Math.round(vector[3] * outputHeight);
let left = Math.round(center_x - width / 2);
let top = Math.round(center_y - height / 2);
let box = {
scores: scores,
classId: classId,
confidence: confidence,
bounding: [left, top, width, height],
toDraw: true
// NMS(Non Maximum Suppression) algorithm
let boxNum = boxes.length;
let tmp_boxes = [];
let sorted_boxes = [];
for (let c = 0; c < classNum; ++c) {
for (let i = 0; i < boxes.length; ++i) {
tmp_boxes[i] = [boxes[i], i];
sorted_boxes = tmp_boxes.sort((a, b) => { return (b[0].scores[c] - a[0].scores[c]); });
for (let i = 0; i < boxNum; ++i) {
if (sorted_boxes[i][0].scores[c] === 0) continue;
else {
for (let j = i + 1; j < boxNum; ++j) {
if (IOU(sorted_boxes[i][0], sorted_boxes[j][0]) >= nmsThreshold) {
boxes[sorted_boxes[j][1]].toDraw = false;
} break;
case "SSD": {
const vecNum = result.matSize[2];
const vecLength = 7;
for (let i = 0; i < vecNum; ++i) {
let vector = resultData.slice(i*vecLength, (i+1)*vecLength);
let confidence = vector[2];
if (confidence > confThreshold) {
let left, top, right, bottom, width, height;
left = Math.round(vector[3]);
top = Math.round(vector[4]);
right = Math.round(vector[5]);
bottom = Math.round(vector[6]);
width = right - left + 1;
height = bottom - top + 1;
if (width <= 2 || height <= 2) {
left = Math.round(vector[3] * outputWidth);
top = Math.round(vector[4] * outputHeight);
right = Math.round(vector[5] * outputWidth);
bottom = Math.round(vector[6] * outputHeight);
width = right - left + 1;
height = bottom - top + 1;
let box = {
classId: vector[1] - 1,
confidence: confidence,
bounding: [left, top, width, height],
toDraw: true
} break;
console.error(`Unsupported output type ${outType}`)
// Draw the saved box into the image
let output = new cv.Mat(outputWidth, outputHeight, cv.CV_8UC3);
cv.cvtColor(frame, output, cv.COLOR_RGBA2RGB);
let boxNum = boxes.length;
for (let i = 0; i < boxNum; ++i) {
if (boxes[i].toDraw) {
return output;
// Calculate the IOU(Intersection over Union) of two boxes
function IOU(box1, box2) {
let bounding1 = box1.bounding;
let bounding2 = box2.bounding;
let s1 = bounding1[2] * bounding1[3];
let s2 = bounding2[2] * bounding2[3];
let left1 = bounding1[0];
let right1 = left1 + bounding1[2];
let left2 = bounding2[0];
let right2 = left2 + bounding2[2];
let overlapW = calOverlap([left1, right1], [left2, right2]);
let top1 = bounding2[1];
let bottom1 = top1 + bounding1[3];
let top2 = bounding2[1];
let bottom2 = top2 + bounding2[3];
let overlapH = calOverlap([top1, bottom1], [top2, bottom2]);
let overlapS = overlapW * overlapH;
return overlapS / (s1 + s2 + overlapS);
// Calculate the overlap range of two vector
function calOverlap(range1, range2) {
let min1 = range1[0];
let max1 = range1[1];
let min2 = range2[0];
let max2 = range2[1];
if (min2 > min1 && min2 < max1) {
return max1 - min2;
} else if (max2 > min1 && max2 < max1) {
return max2 - min1;
} else {
return 0;
// Draw one predict box into the origin image
function drawBox(box) {
let bounding = box.bounding;
let left = bounding[0];
let top = bounding[1];
let width = bounding[2];
let height = bounding[3];
cv.rectangle(output, new cv.Point(left, top), new cv.Point(left + width, top + height),
new cv.Scalar(0, 255, 0));
cv.rectangle(output, new cv.Point(left, top), new cv.Point(left + width, top + 15),
new cv.Scalar(255, 255, 255), cv.FILLED);
let text = `${labels[box.classId]}: ${box.confidence.toFixed(4)}`;
cv.putText(output, text, new cv.Point(left, top + 10), cv.FONT_HERSHEY_SIMPLEX, 0.3,
new cv.Scalar(0, 0, 0));
<script type="text/javascript">
let jsonUrl = "js_object_detection_model_info.json";
drawInfoTable(jsonUrl, 'appendix');
let utils = new Utils('errorMessage');
utils.loadCode('codeSnippet', 'codeEditor');
utils.loadCode('codeSnippet1', 'codeEditor1');
let loadLablesCode = 'loadLables = ' + loadLables.toString();
document.getElementById('codeEditor2').value = loadLablesCode;
let getBlobFromImageCode = 'getBlobFromImage = ' + getBlobFromImage.toString();
document.getElementById('codeEditor3').value = getBlobFromImageCode;
let loadModelCode = 'loadModel = ' + loadModel.toString();
document.getElementById('codeEditor4').value = loadModelCode;
utils.loadCode('codeSnippet5', 'codeEditor5');
let videoInput = document.getElementById('videoInput');
let streaming = false;
let startAndStop = document.getElementById('startAndStop');
startAndStop.addEventListener('click', () => {
if (!streaming) {
utils.startCamera('qvga', onVideoStarted, 'videoInput');
} else {
let configPath = "";
let configFile = document.getElementById('configFile');
configFile.addEventListener('change', async (e) => {
configPath = await loadModel(e);
document.getElementById('status').innerHTML = `The config file '${configPath}' is created successfully.`;
let modelPath = "";
let modelFile = document.getElementById('modelFile');
modelFile.addEventListener('change', async (e) => {
modelPath = await loadModel(e);
document.getElementById('status').innerHTML = `The model file '${modelPath}' is created successfully.`;
configPath = "";
configFile.value = "";
utils.loadOpenCv(() => {
var main = async function(frame) {};
var postProcess = function(result, labels, frame) {};
function onVideoStarted() {
streaming = true;
startAndStop.innerText = 'Stop';
videoInput.width = videoInput.videoWidth;
videoInput.height = videoInput.videoHeight;
function onVideoStopped() {
streaming = false;
startAndStop.innerText = 'Start';
function updateResult(output, time) {
let canvasOutput = document.getElementById('canvasOutput'); = "visible";
cv.imshow('canvasOutput', output);
document.getElementById('status').innerHTML = `<b>Model:</b> ${modelPath}<br>
<b>Inference time:</b> ${time.toFixed(2)} ms`;
} catch(e) {
function initStatus() {
document.getElementById('status').innerHTML = '';
document.getElementById('canvasOutput').style.visibility = "hidden";

@ -0,0 +1,327 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>Pose Estimation Example</title>
<link href="js_example_style.css" rel="stylesheet" type="text/css" />
<h2>Pose Estimation Example</h2>
This tutorial shows you how to write an pose estimation example with OpenCV.js.<br>
To try the example you should click the <b>modelFile</b> button(and <b>configInput</b> button if needed) to upload inference model.
You can find the model URLs and parameters in the <a href="#appendix">model info</a> section.
Then You should change the parameters in the first code snippet according to the uploaded model.
Finally click <b>Try it</b> button to see the result. You can choose any other images.<br>
<div class="control"><button id="tryIt" disabled>Try it</button></div>
<table cellpadding="0" cellspacing="0" width="0" border="0">
<canvas id="canvasInput" width="400" height="250"></canvas>
<canvas id="canvasOutput" style="visibility: hidden;" width="400" height="250"></canvas>
<div class="caption">
canvasInput <input type="file" id="fileInput" name="file" accept="image/*">
<p id='status' align="left"></p>
<div class="caption">
modelFile <input type="file" id="modelFile" name="file">
<div class="caption">
configFile <input type="file" id="configFile">
<p class="err" id="errorMessage"></p>
<h3>Help function</h3>
<p>1.The parameters for model inference which you can modify to investigate more models.</p>
<textarea class="code" rows="9" cols="100" id="codeEditor" spellcheck="false"></textarea>
<p>2.Main loop in which will read the image from canvas and do inference once.</p>
<textarea class="code" rows="15" cols="100" id="codeEditor1" spellcheck="false"></textarea>
<p>3.Get blob from image as input for net, and standardize it with <b>mean</b> and <b>std</b>.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor2" spellcheck="false"></textarea>
<p>4.Fetch model file and save to emscripten file system once click the input button.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor3" spellcheck="false"></textarea>
<p>5.The pairs of keypoints of different dataset.</p>
<textarea class="code" rows="30" cols="100" id="codeEditor4" spellcheck="false"></textarea>
<p>6.The post-processing, including get the predicted points and draw lines into the image.</p>
<textarea class="code" rows="30" cols="100" id="codeEditor5" spellcheck="false"></textarea>
<div id="appendix">
<h2>Model Info:</h2>
<script src="utils.js" type="text/javascript"></script>
<script src="js_dnn_example_helper.js" type="text/javascript"></script>
<script id="codeSnippet" type="text/code-snippet">
inputSize = [368, 368];
mean = [0, 0, 0];
std = 0.00392;
swapRB = false;
threshold = 0.1;
// the pairs of keypoint, can be "COCO", "MPI" and "BODY_25"
dataset = "COCO";
<script id="codeSnippet1" type="text/code-snippet">
main = async function() {
const input = getBlobFromImage(inputSize, mean, std, swapRB, 'canvasInput');
let net = cv.readNet(configPath, modelPath);
const start =;
const result = net.forward();
const time =;
const output = postProcess(result);
updateResult(output, time);
<script id="codeSnippet4" type="text/code-snippet">
if (dataset === 'COCO') {
BODY_PARTS = { "Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
"LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
"RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "REye": 14,
"LEye": 15, "REar": 16, "LEar": 17, "Background": 18 };
POSE_PAIRS = [ ["Neck", "RShoulder"], ["Neck", "LShoulder"], ["RShoulder", "RElbow"],
["RElbow", "RWrist"], ["LShoulder", "LElbow"], ["LElbow", "LWrist"],
["Neck", "RHip"], ["RHip", "RKnee"], ["RKnee", "RAnkle"], ["Neck", "LHip"],
["LHip", "LKnee"], ["LKnee", "LAnkle"], ["Neck", "Nose"], ["Nose", "REye"],
["REye", "REar"], ["Nose", "LEye"], ["LEye", "LEar"] ]
} else if (dataset === 'MPI') {
BODY_PARTS = { "Head": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
"LShoulder": 5, "LElbow": 6, "LWrist": 7, "RHip": 8, "RKnee": 9,
"RAnkle": 10, "LHip": 11, "LKnee": 12, "LAnkle": 13, "Chest": 14,
"Background": 15 }
POSE_PAIRS = [ ["Head", "Neck"], ["Neck", "RShoulder"], ["RShoulder", "RElbow"],
["RElbow", "RWrist"], ["Neck", "LShoulder"], ["LShoulder", "LElbow"],
["LElbow", "LWrist"], ["Neck", "Chest"], ["Chest", "RHip"], ["RHip", "RKnee"],
["RKnee", "RAnkle"], ["Chest", "LHip"], ["LHip", "LKnee"], ["LKnee", "LAnkle"] ]
} else if (dataset === 'BODY_25') {
BODY_PARTS = { "Nose": 0, "Neck": 1, "RShoulder": 2, "RElbow": 3, "RWrist": 4,
"LShoulder": 5, "LElbow": 6, "LWrist": 7, "MidHip": 8, "RHip": 9,
"RKnee": 10, "RAnkle": 11, "LHip": 12, "LKnee": 13, "LAnkle": 14,
"REye": 15, "LEye": 16, "REar": 17, "LEar": 18, "LBigToe": 19,
"LSmallToe": 20, "LHeel": 21, "RBigToe": 22, "RSmallToe": 23,
"RHeel": 24, "Background": 25 }
POSE_PAIRS = [ ["Neck", "Nose"], ["Neck", "RShoulder"],
["Neck", "LShoulder"], ["RShoulder", "RElbow"],
["RElbow", "RWrist"], ["LShoulder", "LElbow"],
["LElbow", "LWrist"], ["Nose", "REye"],
["REye", "REar"], ["Neck", "LEye"],
["LEye", "LEar"], ["Neck", "MidHip"],
["MidHip", "RHip"], ["RHip", "RKnee"],
["RKnee", "RAnkle"], ["RAnkle", "RBigToe"],
["RBigToe", "RSmallToe"], ["RAnkle", "RHeel"],
["MidHip", "LHip"], ["LHip", "LKnee"],
["LKnee", "LAnkle"], ["LAnkle", "LBigToe"],
["LBigToe", "LSmallToe"], ["LAnkle", "LHeel"] ]
<script id="codeSnippet5" type="text/code-snippet">
postProcess = function(result) {
const resultData = result.data32F;
const matSize = result.matSize;
const size1 = matSize[1];
const size2 = matSize[2];
const size3 = matSize[3];
const mapSize = size2 * size3;
let canvasOutput = document.getElementById('canvasOutput');
const outputWidth = canvasOutput.width;
const outputHeight = canvasOutput.height;
let image = cv.imread("canvasInput");
let output = new cv.Mat(outputWidth, outputHeight, cv.CV_8UC3);
cv.cvtColor(image, output, cv.COLOR_RGBA2RGB);
// get position of keypoints from output
let points = [];
for (let i = 0; i < Object.keys(BODY_PARTS).length; ++i) {
heatMap = resultData.slice(i*mapSize, (i+1)*mapSize);
let maxIndex = 0;
let maxConf = heatMap[0];
for (index in heatMap) {
if (heatMap[index] > heatMap[maxIndex]) {
maxIndex = index;
maxConf = heatMap[index];
if (maxConf > threshold) {
indexX = maxIndex % size3;
indexY = maxIndex / size3;
x = outputWidth * indexX / size3;
y = outputHeight * indexY / size2;
points[i] = [Math.round(x), Math.round(y)];
// draw the points and lines into the image
for (pair of POSE_PAIRS) {
partFrom = pair[0];
partTo = pair[1];
idFrom = BODY_PARTS[partFrom];
idTo = BODY_PARTS[partTo];
pointFrom = points[idFrom];
pointTo = points[idTo];
if (points[idFrom] && points[idTo]) {
cv.line(output, new cv.Point(pointFrom[0], pointFrom[1]),
new cv.Point(pointTo[0], pointTo[1]), new cv.Scalar(0, 255, 0), 3);
cv.ellipse(output, new cv.Point(pointFrom[0], pointFrom[1]), new cv.Size(3, 3), 0, 0, 360,
new cv.Scalar(0, 0, 255), cv.FILLED);
cv.ellipse(output, new cv.Point(pointTo[0], pointTo[1]), new cv.Size(3, 3), 0, 0, 360,
new cv.Scalar(0, 0, 255), cv.FILLED);
return output;
<script type="text/javascript">
let jsonUrl = "js_pose_estimation_model_info.json";
drawInfoTable(jsonUrl, 'appendix');
let utils = new Utils('errorMessage');
utils.loadCode('codeSnippet', 'codeEditor');
utils.loadCode('codeSnippet1', 'codeEditor1');
let getBlobFromImageCode = 'getBlobFromImage = ' + getBlobFromImage.toString();
document.getElementById('codeEditor2').value = getBlobFromImageCode;
let loadModelCode = 'loadModel = ' + loadModel.toString();
document.getElementById('codeEditor3').value = loadModelCode;
utils.loadCode('codeSnippet4', 'codeEditor4');
utils.loadCode('codeSnippet5', 'codeEditor5');
let canvas = document.getElementById('canvasInput');
let ctx = canvas.getContext('2d');
let img = new Image();
img.crossOrigin = 'anonymous';
img.src = 'roi.jpg';
img.onload = function() {
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
let tryIt = document.getElementById('tryIt');
tryIt.addEventListener('click', () => {
document.getElementById('status').innerHTML = 'Running function main()...';
if (modelPath === "") {
document.getElementById('status').innerHTML = 'Runing failed.';
utils.printError('Please upload model file by clicking the button first.');
} else {
setTimeout(main, 1);
let fileInput = document.getElementById('fileInput');
fileInput.addEventListener('change', (e) => {
loadImageToCanvas(e, 'canvasInput');
let configPath = "";
let configFile = document.getElementById('configFile');
configFile.addEventListener('change', async (e) => {
configPath = await loadModel(e);
document.getElementById('status').innerHTML = `The config file '${configPath}' is created successfully.`;
let modelPath = "";
let modelFile = document.getElementById('modelFile');
modelFile.addEventListener('change', async (e) => {
modelPath = await loadModel(e);
document.getElementById('status').innerHTML = `The model file '${modelPath}' is created successfully.`;
configPath = "";
configFile.value = "";
utils.loadOpenCv(() => {
var main = async function() {};
var postProcess = function(result) {};
function updateResult(output, time) {
let canvasOutput = document.getElementById('canvasOutput'); = "visible";
let resized = new cv.Mat(canvasOutput.width, canvasOutput.height, cv.CV_8UC4);
cv.resize(output, resized, new cv.Size(canvasOutput.width, canvasOutput.height));
cv.imshow('canvasOutput', resized);
document.getElementById('status').innerHTML = `<b>Model:</b> ${modelPath}<br>
<b>Inference time:</b> ${time.toFixed(2)} ms`;
} catch(e) {
function initStatus() {
document.getElementById('status').innerHTML = '';
document.getElementById('canvasOutput').style.visibility = "hidden";

@ -0,0 +1,34 @@
"caffe": [
"model": "body_25",
"inputSize": "368, 368",
"mean": "0, 0, 0",
"std": "0.00392",
"swapRB": "false",
"dataset": "BODY_25",
"modelUrl": "",
"configUrl": ""
"model": "coco",
"inputSize": "368, 368",
"mean": "0, 0, 0",
"std": "0.00392",
"swapRB": "false",
"dataset": "COCO",
"modelUrl": "",
"configUrl": ""
"model": "mpi",
"inputSize": "368, 368",
"mean": "0, 0, 0",
"std": "0.00392",
"swapRB": "false",
"dataset": "MPI",
"modelUrl": "",
"configUrl": ""

@ -0,0 +1,243 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>Semantic Segmentation Example</title>
<link href="js_example_style.css" rel="stylesheet" type="text/css" />
<h2>Semantic Segmentation Example</h2>
This tutorial shows you how to write an semantic segmentation example with OpenCV.js.<br>
To try the example you should click the <b>modelFile</b> button(and <b>configInput</b> button if needed) to upload inference model.
You can find the model URLs and parameters in the <a href="#appendix">model info</a> section.
Then You should change the parameters in the first code snippet according to the uploaded model.
Finally click <b>Try it</b> button to see the result. You can choose any other images.<br>
<div class="control"><button id="tryIt" disabled>Try it</button></div>
<table cellpadding="0" cellspacing="0" width="0" border="0">
<canvas id="canvasInput" width="400" height="400"></canvas>
<canvas id="canvasOutput" style="visibility: hidden;" width="400" height="400"></canvas>
<div class="caption">
canvasInput <input type="file" id="fileInput" name="file" accept="image/*">
<p id='status' align="left"></p>
<div class="caption">
modelFile <input type="file" id="modelFile" name="file">
<div class="caption">
configFile <input type="file" id="configFile">
<p class="err" id="errorMessage"></p>
<h3>Help function</h3>
<p>1.The parameters for model inference which you can modify to investigate more models.</p>
<textarea class="code" rows="5" cols="100" id="codeEditor" spellcheck="false"></textarea>
<p>2.Main loop in which will read the image from canvas and do inference once.</p>
<textarea class="code" rows="16" cols="100" id="codeEditor1" spellcheck="false"></textarea>
<p>3.Get blob from image as input for net, and standardize it with <b>mean</b> and <b>std</b>.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor2" spellcheck="false"></textarea>
<p>4.Fetch model file and save to emscripten file system once click the input button.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor3" spellcheck="false"></textarea>
<p>5.The post-processing, including gengerate colors for different classes and argmax to get the classes for each pixel.</p>
<textarea class="code" rows="34" cols="100" id="codeEditor4" spellcheck="false"></textarea>
<div id="appendix">
<h2>Model Info:</h2>
<script src="utils.js" type="text/javascript"></script>
<script src="js_dnn_example_helper.js" type="text/javascript"></script>
<script id="codeSnippet" type="text/code-snippet">
inputSize = [513, 513];
mean = [127.5, 127.5, 127.5];
std = 0.007843;
swapRB = false;
<script id="codeSnippet1" type="text/code-snippet">
main = async function() {
const input = getBlobFromImage(inputSize, mean, std, swapRB, 'canvasInput');
let net = cv.readNet(configPath, modelPath);
const start =;
const result = net.forward();
const time =;
const colors = generateColors(result);
const output = argmax(result, colors);
updateResult(output, time);
<script id="codeSnippet4" type="text/code-snippet">
generateColors = function(result) {
const numClasses = result.matSize[1];
let colors = [0,0,0];
while(colors.length < numClasses*3){
colors.push(Math.round((Math.random()*255 + colors[colors.length-3]) / 2));
return colors;
argmax = function(result, colors) {
const C = result.matSize[1];
const H = result.matSize[2];
const W = result.matSize[3];
const resultData = result.data32F;
const imgSize = H*W;
let classId = [];
for (i = 0; i<imgSize; ++i) {
let id = 0;
for (j = 0; j < C; ++j) {
if (resultData[j*imgSize+i] > resultData[id*imgSize+i]) {
id = j;
output = cv.matFromArray(H,W,cv.CV_8UC4,classId);
return output;
<script type="text/javascript">
let jsonUrl = "js_semantic_segmentation_model_info.json";
drawInfoTable(jsonUrl, 'appendix');
let utils = new Utils('errorMessage');
utils.loadCode('codeSnippet', 'codeEditor');
utils.loadCode('codeSnippet1', 'codeEditor1');
let getBlobFromImageCode = 'getBlobFromImage = ' + getBlobFromImage.toString();
document.getElementById('codeEditor2').value = getBlobFromImageCode;
let loadModelCode = 'loadModel = ' + loadModel.toString();
document.getElementById('codeEditor3').value = loadModelCode;
utils.loadCode('codeSnippet4', 'codeEditor4');
let canvas = document.getElementById('canvasInput');
let ctx = canvas.getContext('2d');
let img = new Image();
img.crossOrigin = 'anonymous';
img.src = 'roi.jpg';
img.onload = function() {
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
let tryIt = document.getElementById('tryIt');
tryIt.addEventListener('click', () => {
document.getElementById('status').innerHTML = 'Running function main()...';
if (modelPath === "") {
document.getElementById('status').innerHTML = 'Runing failed.';
utils.printError('Please upload model file by clicking the button first.');
} else {
setTimeout(main, 1);
let fileInput = document.getElementById('fileInput');
fileInput.addEventListener('change', (e) => {
loadImageToCanvas(e, 'canvasInput');
let configPath = "";
let configFile = document.getElementById('configFile');
configFile.addEventListener('change', async (e) => {
configPath = await loadModel(e);
document.getElementById('status').innerHTML = `The config file '${configPath}' is created successfully.`;
let modelPath = "";
let modelFile = document.getElementById('modelFile');
modelFile.addEventListener('change', async (e) => {
modelPath = await loadModel(e);
document.getElementById('status').innerHTML = `The model file '${modelPath}' is created successfully.`;
configPath = "";
configFile.value = "";
utils.loadOpenCv(() => {
var main = async function() {};
var generateColors = function(result) {};
var argmax = function(result, colors) {};
function updateResult(output, time) {
let canvasOutput = document.getElementById('canvasOutput'); = "visible";
let resized = new cv.Mat(canvasOutput.width, canvasOutput.height, cv.CV_8UC4);
cv.resize(output, resized, new cv.Size(canvasOutput.width, canvasOutput.height));
cv.imshow('canvasOutput', resized);
document.getElementById('status').innerHTML = `<b>Model:</b> ${modelPath}<br>
<b>Inference time:</b> ${time.toFixed(2)} ms`;
} catch(e) {
function initStatus() {
document.getElementById('status').innerHTML = '';
document.getElementById('canvasOutput').style.visibility = "hidden";

@ -0,0 +1,12 @@
"tensorflow": [
"model": "deeplabv3",
"inputSize": "513, 513",
"mean": "127.5, 127.5, 127.5",
"std": "0.007843",
"swapRB": "false",
"modelUrl": ""

@ -0,0 +1,228 @@
<!DOCTYPE html>
<meta charset="utf-8">
<title>Style Transfer Example</title>
<link href="js_example_style.css" rel="stylesheet" type="text/css" />
<h2>Style Transfer Example</h2>
This tutorial shows you how to write an style transfer example with OpenCV.js.<br>
To try the example you should click the <b>modelFile</b> button(and <b>configFile</b> button if needed) to upload inference model.
You can find the model URLs and parameters in the <a href="#appendix">model info</a> section.
Then You should change the parameters in the first code snippet according to the uploaded model.
Finally click <b>Try it</b> button to see the result. You can choose any other images.<br>
<div class="control"><button id="tryIt" disabled>Try it</button></div>
<table cellpadding="0" cellspacing="0" width="0" border="0">
<canvas id="canvasInput" width="400" height="400"></canvas>
<canvas id="canvasOutput" style="visibility: hidden;" width="400" height="400"></canvas>
<div class="caption">
canvasInput <input type="file" id="fileInput" name="file" accept="image/*">
<p id='status' align="left"></p>
<div class="caption">
modelFile <input type="file" id="modelFile" name="file">
<div class="caption">
configFile <input type="file" id="configFile">
<p class="err" id="errorMessage"></p>
<h3>Help function</h3>
<p>1.The parameters for model inference which you can modify to investigate more models.</p>
<textarea class="code" rows="5" cols="100" id="codeEditor" spellcheck="false"></textarea>
<p>2.Main loop in which will read the image from canvas and do inference once.</p>
<textarea class="code" rows="15" cols="100" id="codeEditor1" spellcheck="false"></textarea>
<p>3.Get blob from image as input for net, and standardize it with <b>mean</b> and <b>std</b>.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor2" spellcheck="false"></textarea>
<p>4.Fetch model file and save to emscripten file system once click the input button.</p>
<textarea class="code" rows="17" cols="100" id="codeEditor3" spellcheck="false"></textarea>
<p>5.The post-processing, including scaling and reordering.</p>
<textarea class="code" rows="21" cols="100" id="codeEditor4" spellcheck="false"></textarea>
<div id="appendix">
<h2>Model Info:</h2>
<script src="utils.js" type="text/javascript"></script>
<script src="js_dnn_example_helper.js" type="text/javascript"></script>
<script id="codeSnippet" type="text/code-snippet">
inputSize = [224, 224];
mean = [104, 117, 123];
std = 1;
swapRB = false;
<script id="codeSnippet1" type="text/code-snippet">
main = async function() {
const input = getBlobFromImage(inputSize, mean, std, swapRB, 'canvasInput');
let net = cv.readNet(configPath, modelPath);
const start =;
const result = net.forward();
const time =;
const output = postProcess(result);
updateResult(output, time);
<script id="codeSnippet4" type="text/code-snippet">
postProcess = function(result) {
const resultData = result.data32F;
const C = result.matSize[1];
const H = result.matSize[2];
const W = result.matSize[3];
const mean = [104, 117, 123];
let normData = [];
for (let h = 0; h < H; ++h) {
for (let w = 0; w < W; ++w) {
for (let c = 0; c < C; ++c) {
normData.push(resultData[c*H*W + h*W + w] + mean[c]);
let output = new cv.matFromArray(H, W, cv.CV_8UC4, normData);
return output;
<script type="text/javascript">
let jsonUrl = "js_style_transfer_model_info.json";
drawInfoTable(jsonUrl, 'appendix');
let utils = new Utils('errorMessage');
utils.loadCode('codeSnippet', 'codeEditor');
utils.loadCode('codeSnippet1', 'codeEditor1');
let getBlobFromImageCode = 'getBlobFromImage = ' + getBlobFromImage.toString();
document.getElementById('codeEditor2').value = getBlobFromImageCode;
let loadModelCode = 'loadModel = ' + loadModel.toString();
document.getElementById('codeEditor3').value = loadModelCode;
utils.loadCode('codeSnippet4', 'codeEditor4');
let canvas = document.getElementById('canvasInput');
let ctx = canvas.getContext('2d');
let img = new Image();
img.crossOrigin = 'anonymous';
img.src = 'lena.png';
img.onload = function() {
ctx.drawImage(img, 0, 0, canvas.width, canvas.height);
let tryIt = document.getElementById('tryIt');
tryIt.addEventListener('click', () => {
document.getElementById('status').innerHTML = 'Running function main()...';
if (modelPath === "") {
document.getElementById('status').innerHTML = 'Runing failed.';
utils.printError('Please upload model file by clicking the button first.');
} else {
setTimeout(main, 1);
let fileInput = document.getElementById('fileInput');
fileInput.addEventListener('change', (e) => {
loadImageToCanvas(e, 'canvasInput');
let configPath = "";
let configFile = document.getElementById('configFile');
configFile.addEventListener('change', async (e) => {
configPath = await loadModel(e);
document.getElementById('status').innerHTML = `The config file '${configPath}' is created successfully.`;
let modelPath = "";
let modelFile = document.getElementById('modelFile');
modelFile.addEventListener('change', async (e) => {
modelPath = await loadModel(e);
document.getElementById('status').innerHTML = `The model file '${modelPath}' is created successfully.`;
configPath = "";
configFile.value = "";
utils.loadOpenCv(() => {
var main = async function() {};
var postProcess = function(result) {};
function updateResult(output, time) {
let canvasOutput = document.getElementById('canvasOutput'); = "visible";
let resized = new cv.Mat(canvasOutput.width, canvasOutput.height, cv.CV_8UC4);
cv.resize(output, resized, new cv.Size(canvasOutput.width, canvasOutput.height));
cv.imshow('canvasOutput', resized);
document.getElementById('status').innerHTML = `<b>Model:</b> ${modelPath}<br>
<b>Inference time:</b> ${time.toFixed(2)} ms`;
} catch(e) {
function initStatus() {
document.getElementById('status').innerHTML = '';
document.getElementById('canvasOutput').style.visibility = "hidden";

@ -0,0 +1,76 @@
"torch": [
"model": "candy.t7",
"inputSize": "224, 224",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"modelUrl": ""
"model": "composition_vii.t7",
"inputSize": "224, 224",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"modelUrl": ""
"model": "feathers.t7",
"inputSize": "224, 224",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"modelUrl": ""
"model": "la_muse.t7",
"inputSize": "224, 224",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"modelUrl": ""
"model": "mosaic.t7",
"inputSize": "224, 224",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"modelUrl": ""
"model": "starry_night.t7",
"inputSize": "224, 224",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"modelUrl": ""
"model": "the_scream.t7",
"inputSize": "224, 224",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"modelUrl": ""
"model": "the_wave.t7",
"inputSize": "224, 224",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"modelUrl": ""
"model": "udnie.t7",
"inputSize": "224, 224",
"mean": "104, 117, 123",
"std": "1",
"swapRB": "false",
"modelUrl": ""

@ -7,7 +7,7 @@ function Utils(errorOutputId) { // eslint-disable-line no-unused-vars
let script = document.createElement('script');
script.setAttribute('async', '');
script.setAttribute('type', 'text/javascript');
script.addEventListener('load', () => {
script.addEventListener('load', async () => {
if (cv.getBuildInformation)
@ -16,9 +16,15 @@ function Utils(errorOutputId) { // eslint-disable-line no-unused-vars
if (cv instanceof Promise) {
cv = await cv;
} else {

@ -0,0 +1,13 @@
Image Classification Example {#tutorial_js_image_classification}
- In this tutorial you will learn how to use OpenCV.js dnn module for image classification.
<iframe src="../../js_image_classification.html" width="100%"
onload=" +'px';">

@ -0,0 +1,15 @@
Image Classification Example with Camera {#tutorial_js_image_classification_with_camera}
- In this tutorial you will learn how to use OpenCV.js dnn module for image classification example with camera.
@note If you don't know how to capture video from camera, please review @ref tutorial_js_video_display.
<iframe src="../../js_image_classification_with_camera.html" width="100%"
onload=" +'px';">

@ -0,0 +1,13 @@
Object Detection Example {#tutorial_js_object_detection}
- In this tutorial you will learn how to use OpenCV.js dnn module for object detection.
<iframe src="../../js_object_detection.html" width="100%"
onload=" +'px';">

@ -0,0 +1,13 @@
Object Detection Example with Camera{#tutorial_js_object_detection_with_camera}
- In this tutorial you will learn how to use OpenCV.js dnn module for object detection with camera.
<iframe src="../../js_object_detection_with_camera.html" width="100%"
onload=" +'px';">

@ -0,0 +1,13 @@
Pose Estimation Example {#tutorial_js_pose_estimation}
- In this tutorial you will learn how to use OpenCV.js dnn module for pose estimation.
<iframe src="../../js_pose_estimation.html" width="100%"
onload=" +'px';">

@ -0,0 +1,13 @@
Semantic Segmentation Example {#tutorial_js_semantic_segmentation}
- In this tutorial you will learn how to use OpenCV.js dnn module for semantic segmentation.
<iframe src="../../js_semantic_segmentation.html" width="100%"
onload=" +'px';">

@ -0,0 +1,13 @@
Style Transfer Example {#tutorial_js_style_transfer}
- In this tutorial you will learn how to use OpenCV.js dnn module for style transfer.
<iframe src="../../js_style_transfer.html" width="100%"
onload=" +'px';">

@ -0,0 +1,30 @@
Deep Neural Networks (dnn module) {#tutorial_js_table_of_contents_dnn}
- @subpage tutorial_js_image_classification
Image classification example
- @subpage tutorial_js_image_classification_with_camera
Image classification example with camera
- @subpage tutorial_js_object_detection
Object detection example
- @subpage tutorial_js_object_detection_with_camera
Object detection example with camera
- @subpage tutorial_js_semantic_segmentation
Semantic segmentation example
- @subpage tutorial_js_style_transfer
Style transfer example
- @subpage tutorial_js_pose_estimation
Pose estimation example

@ -26,3 +26,7 @@ OpenCV.js Tutorials {#tutorial_js_root}
In this section you
will object detection techniques like face detection etc.
- @subpage tutorial_js_table_of_contents_dnn
These tutorials show how to use dnn module in JavaScript
