#!/usr/bin/env python import os, sys, fnmatch, re sys.path.append("../modules/python/src2/") sys.path.append("../modules/java/generator") import hdr_parser as hp import rst_parser as rp rp.show_warnings = False rp.show_errors = False allmodules = rp.allmodules DOCUMENTED_MARKER = "verified" ERROR_001_NOTACLASS = 1 ERROR_002_NOTASTRUCT = 2 ERROR_003_INCORRECTBASE = 3 ERROR_004_MISSEDNAMESPACE = 4 ERROR_005_MISSINGPYFUNC = 5 ERROR_006_INVALIDPYOLDDOC = 6 ERROR_007_INVALIDPYDOC = 7 ERROR_008_CFUNCISNOTGLOBAL = 8 ERROR_009_OVERLOADNOTFOUND = 9 ERROR_010_UNKNOWNCLASS = 10 ERROR_011_UNKNOWNFUNC = 11 do_python_crosscheck = True errors_disabled = [ERROR_004_MISSEDNAMESPACE] doc_signatures_whitelist = [ # templates "Matx", "Vec", "SparseMat_", "Scalar_", "Mat_", "Ptr", "Size_", "Point_", "Rect_", "Point3_", "DataType", "detail::RotationWarperBase", "flann::Index_", "CalonderDescriptorExtractor", "cuda::PtrStepSz", "cuda::PtrStep", "cuda::PtrElemStep_", # black boxes "CvArr", "CvFileStorage", # other "InputArray", "OutputArray", ] defines = ["cvGraphEdgeIdx", "cvFree", "CV_Assert", "cvSqrt", "cvGetGraphVtx", "cvGraphVtxIdx", "cvCaptureFromFile", "cvCaptureFromCAM", "cvCalcBackProjectPatch", "cvCalcBackProject", "cvGetHistValue_1D", "cvGetHistValue_2D", "cvGetHistValue_3D", "cvGetHistValue_nD", "cvQueryHistValue_1D", "cvQueryHistValue_2D", "cvQueryHistValue_3D", "cvQueryHistValue_nD", # not a real function but behaves as function "Mat::size", # ugly "virtual" functions from ml module "CvStatModel::train", "CvStatModel::predict", # TODO: "cvExtractSURF" ] synonims = { "StarDetector" : ["StarFeatureDetector"], "MSER" : ["MserFeatureDetector"], "GFTTDetector" : ["GoodFeaturesToTrackDetector"], "cvCaptureFromFile" : ["cvCreateFileCapture"], "cvCaptureFromCAM" : ["cvCreateCameraCapture"], "cvCalcArrBackProjectPatch" : ["cvCalcBackProjectPatch"], "cvCalcArrBackProject" : ["cvCalcBackProject"], "InputArray" : ["_InputArray"], "OutputArray" : ["_OutputArray"], } if do_python_crosscheck: try: import cv2 except ImportError: print "Could not load cv2" do_python_crosscheck = False def get_cv2_object(name): if name.startswith("cv2."): name = name[4:] if name.startswith("cv."): name = name[3:] if name == "Algorithm": return cv2.Algorithm__create("Feature2D.ORB"), name elif name == "FeatureDetector": return cv2.FeatureDetector_create("ORB"), name elif name == "DescriptorExtractor": return cv2.DescriptorExtractor_create("ORB"), name elif name == "BackgroundSubtractor": return cv2.createBackgroundSubtractorMOG(), name elif name == "StatModel": return cv2.KNearest(), name else: try: obj = getattr(cv2, name)() except AttributeError: obj = getattr(cv2, "create" + name)() return obj, name def compareSignatures(f, s): # function names if f[0] != s[0]: return False, "name mismatch" # return type stype = (s[1] or "void") ftype = f[1] stype = re.sub(r"\b(cv|std)::", "", stype) if ftype: ftype = re.sub(r"\b(cv|std)::", "", ftype) if ftype and ftype != stype: return False, "return type mismatch" if ("\C" in f[2]) ^ ("\C" in s[2]): return False, "const qualifier mismatch" if ("\S" in f[2]) ^ ("\S" in s[2]): return False, "static qualifier mismatch" if ("\V" in f[2]) ^ ("\V" in s[2]): return False, "virtual qualifier mismatch" if ("\A" in f[2]) ^ ("\A" in s[2]): return False, "abstract qualifier mismatch" if len(f[3]) != len(s[3]): return False, "different number of arguments" for idx, arg in enumerate(zip(f[3], s[3])): farg = arg[0] sarg = arg[1] ftype = re.sub(r"\b(cv|std)::", "", (farg[0] or "")) stype = re.sub(r"\b(cv|std)::", "", (sarg[0] or "")) ftype = re.sub(r"\s+(\*|&)$", "\\1", ftype) stype = re.sub(r"\s+(\*|&)$", "\\1", stype) if ftype != stype: return False, "type of argument #" + str(idx+1) + " mismatch" fname = farg[1] or "arg" + str(idx) sname = sarg[1] or "arg" + str(idx) if fname != sname: return False, "name of argument #" + str(idx+1) + " mismatch" fdef = re.sub(r"\b(cv|std)::", "", (farg[2] or "")) sdef = re.sub(r"\b(cv|std)::", "", (sarg[2] or "")) if fdef != sdef: return False, "default value of argument #" + str(idx+1) + " mismatch" return True, "match" def formatSignature(s): _str = "" if "/V" in s[2]: _str += "virtual " if "/S" in s[2]: _str += "static " if s[1]: _str += s[1] + " " else: if not bool(re.match(r"(\w+\.)*(?P<cls>\w+)\.(?P=cls)", s[0])): _str += "void " if s[0].startswith("cv."): _str += s[0][3:].replace(".", "::") else: _str += s[0].replace(".", "::") if len(s[3]) == 0: _str += "()" else: _str += "( " for idx, arg in enumerate(s[3]): if idx > 0: _str += ", " argtype = re.sub(r"\bcv::", "", arg[0]) argtype = re.sub(r"\s+(\*|&)$", "\\1", arg[0]) bidx = argtype.find('[') if bidx < 0: _str += argtype else: _str += argtype[:bidx] _str += " " if arg[1]: _str += arg[1] else: _str += "arg" + str(idx) if bidx >= 0: _str += argtype[bidx:] if arg[2]: _str += "=" + re.sub(r"\bcv::", "", arg[2]) _str += " )" if "/C" in s[2]: _str += " const" if "/A" in s[2]: _str += " = 0" return _str def logerror(code, message, doc = None): if code in errors_disabled: return if doc: print doc["file"] + ":" + str(doc["line"]), print "error %03d: %s" % (code, message) #print def process_module(module, path): hppparser = hp.CppHeaderParser() rstparser = rp.RstParser(hppparser) rstparser.parse(module, path) rst = rstparser.definitions hdrlist = [] for root, dirs, files in os.walk(os.path.join(path, "include")): for filename in fnmatch.filter(files, "*.h*"): hdrlist.append(os.path.join(root, filename)) if module == "cuda": hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda_types.hpp")) hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda.hpp")) hdrlist.append(os.path.join(path, "..", "core", "include", "opencv2", "core", "cuda_stream_accessor.hpp")) decls = [] for hname in hdrlist: if not "ts_gtest.h" in hname: decls += hppparser.parse(hname, wmode=False) funcs = [] # not really needed to hardcode all the namespaces. Normally all they are collected automatically namespaces = ['cv', 'cv.cuda', 'cvflann', 'cvflann.anyimpl', 'cvflann.lsh', 'cv.flann', 'cv.linemod', 'cv.detail', 'cvtest', 'perf', 'cv.videostab'] classes = [] structs = [] # collect namespaces and classes/structs for decl in decls: if decl[0].startswith("const"): pass elif decl[0].startswith("class") or decl[0].startswith("struct"): if decl[0][0] == 'c': classes.append(decl) else: structs.append(decl) dotIdx = decl[0].rfind('.') if dotIdx > 0: namespace = decl[0][decl[0].find(' ')+1:dotIdx] if not [c for c in classes if c[0].endswith(namespace)] and not [s for s in structs if s[0].endswith(namespace)]: if namespace not in namespaces: namespaces.append(namespace) else: funcs.append(decl) clsnamespaces = [] # process classes for cl in classes: name = cl[0][cl[0].find(' ')+1:] if name.find('.') < 0 and not name.startswith("Cv"): logerror(ERROR_004_MISSEDNAMESPACE, "class " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention") clsnamespaces.append(name) if do_python_crosscheck and not name.startswith("cv.") and name.startswith("Cv"): clsnamespaces.append("cv." + name[2:]) if name.startswith("cv."): name = name[3:] name = name.replace(".", "::") sns = synonims.get(name, []) sns.append(name) for name in sns: doc = rst.get(name) if not doc: #TODO: class is not documented continue doc[DOCUMENTED_MARKER] = True # verify class marker if not doc.get("isclass"): logerror(ERROR_001_NOTACLASS, "class " + name + " is not marked as \"class\" in documentation", doc) else: # verify base signature = doc.get("class", "") signature = signature.replace(" public ", " ") namespaceIdx = signature.rfind("::") signature = ("class " + signature).strip() hdrsignature = ("class " + name + " " + cl[1]).replace(".", "::").replace("cv::","").strip() if signature != hdrsignature: logerror(ERROR_003_INCORRECTBASE, "invalid base class documentation\ndocumented: " + signature + "\nactual: " + hdrsignature, doc) # process structs for st in structs: name = st[0][st[0].find(' ')+1:] if name.find('.') < 0 and not name.startswith("Cv"): logerror(ERROR_004_MISSEDNAMESPACE, "struct " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention") clsnamespaces.append(name) if name.startswith("cv."): name = name[3:] name = name.replace(".", "::") doc = rst.get(name) if not doc: #TODO: struct is not documented continue doc[DOCUMENTED_MARKER] = True # verify struct marker if not doc.get("isstruct"): logerror(ERROR_002_NOTASTRUCT, "struct " + name + " is not marked as \"struct\" in documentation", doc) else: # verify base signature = doc.get("class", "") signature = signature.replace(", public ", " ").replace(" public ", " ") signature = signature.replace(", protected ", " ").replace(" protected ", " ") signature = signature.replace(", private ", " ").replace(" private ", " ") signature = ("struct " + signature).strip() hdrsignature = (st[0] + " " + st[1]).replace("struct cv.", "struct ").replace(".", "::").strip() if signature != hdrsignature: logerror(ERROR_003_INCORRECTBASE, "invalid base struct documentation\ndocumented: " + signature + "\nactual: " + hdrsignature, doc) print st, doc # process functions and methods flookup = {} for fn in funcs: name = fn[0] parent = None namespace = None for cl in clsnamespaces: if name.startswith(cl + "."): if cl.startswith(parent or ""): parent = cl if parent: name = name[len(parent) + 1:] for nm in namespaces: if parent.startswith(nm + "."): if nm.startswith(namespace or ""): namespace = nm if namespace: parent = parent[len(namespace) + 1:] else: for nm in namespaces: if name.startswith(nm + "."): if nm.startswith(namespace or ""): namespace = nm if namespace: name = name[len(namespace) + 1:] #print namespace, parent, name, fn[0] if not namespace and not parent and not name.startswith("cv") and not name.startswith("icv") and not name.startswith("CV_"): logerror(ERROR_004_MISSEDNAMESPACE, "function " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention") else: fdescr = (namespace, parent, name, fn) flookup_entry = flookup.get(fn[0], []) flookup_entry.append(fdescr) flookup[fn[0]] = flookup_entry if do_python_crosscheck: pyclsnamespaces = ["cv." + x[3:].replace(".", "_") for x in clsnamespaces] for name, doc in rst.iteritems(): decls = doc.get("decls") if not decls: continue for signature in decls: if signature[0] == "Python1": pname = signature[1][:signature[1].find('(')] try: fn = getattr(cv2.cv, pname[3:]) docstr = "cv." + fn.__doc__ except AttributeError: logerror(ERROR_005_MISSINGPYFUNC, "could not load documented function: cv2." + pname, doc) continue docstring = docstr sign = signature[1] signature.append(DOCUMENTED_MARKER) # convert old signature to pydoc style if docstring.endswith("*"): docstring = docstring[:-1] s = None while s != sign: s = sign sign = re.sub(r"^(.*\(.*)\(.*?\)(.*\) *->)", "\\1_\\2", sign) s = None while s != sign: s = sign sign = re.sub(r"\s*,\s*([^,]+)\s*=\s*[^,]+\s*(( \[.*\])?)\)", " [, \\1\\2])", sign) sign = re.sub(r"\(\s*([^,]+)\s*=\s*[^,]+\s*(( \[.*\])?)\)", "([\\1\\2])", sign) sign = re.sub(r"\)\s*->\s*", ") -> ", sign) sign = sign.replace("-> convexHull", "-> CvSeq") sign = sign.replace("-> lines", "-> CvSeq") sign = sign.replace("-> boundingRects", "-> CvSeq") sign = sign.replace("-> contours", "-> CvSeq") sign = sign.replace("-> retval", "-> int") sign = sign.replace("-> detectedObjects", "-> CvSeqOfCvAvgComp") def retvalRplace(match): m = match.group(1) m = m.replace("CvScalar", "scalar") m = m.replace("CvMemStorage", "memstorage") m = m.replace("ROIplImage", "image") m = m.replace("IplImage", "image") m = m.replace("ROCvMat", "mat") m = m.replace("CvMat", "mat") m = m.replace("double", "float") m = m.replace("CvSubdiv2DPoint", "point") m = m.replace("CvBox2D", "Box2D") m = m.replace("IplConvKernel", "kernel") m = m.replace("CvHistogram", "hist") m = m.replace("CvSize", "width,height") m = m.replace("cvmatnd", "matND") m = m.replace("CvSeqOfCvConvexityDefect", "convexityDefects") mm = m.split(',') if len(mm) > 1: return "(" + ", ".join(mm) + ")" else: return m docstring = re.sub(r"(?<=-> )(.*)$", retvalRplace, docstring) docstring = docstring.replace("( [, ", "([") if sign != docstring: logerror(ERROR_006_INVALIDPYOLDDOC, "old-style documentation differs from pydoc\npydoc: " + docstring + "\nfixup: " + sign + "\ncvdoc: " + signature[1], doc) elif signature[0] == "Python2": pname = signature[1][4:signature[1].find('(')] cvname = "cv." + pname parent = None for cl in pyclsnamespaces: if cvname.startswith(cl + "."): if cl.startswith(parent or ""): parent = cl try: if parent: instance, clsname = get_cv2_object(parent) fn = getattr(instance, cvname[len(parent)+1:]) docstr = fn.__doc__ docprefix = "cv2." + clsname + "." else: fn = getattr(cv2, pname) docstr = fn.__doc__ docprefix = "cv2." except AttributeError: if parent: logerror(ERROR_005_MISSINGPYFUNC, "could not load documented member of " + parent + " class: cv2." + pname, doc) else: logerror(ERROR_005_MISSINGPYFUNC, "could not load documented function cv2." + pname, doc) signature.append(DOCUMENTED_MARKER) # stop subsequent errors continue docstrings = [docprefix + s.replace("([, ", "([") for s in docstr.split(" or ")] if not signature[1] in docstrings: pydocs = "\npydoc: ".join(docstrings) logerror(ERROR_007_INVALIDPYDOC, "documentation differs from pydoc\npydoc: " + pydocs + "\ncvdoc: " + signature[1], doc) signature.append(DOCUMENTED_MARKER) # verify C/C++ signatures for name, doc in rst.iteritems(): decls = doc.get("decls") if not decls: continue for signature in decls: if signature[0] == "C" or signature[0] == "C++": if "template" in (signature[2][1] or ""): # TODO find a way to validate templates signature.append(DOCUMENTED_MARKER) continue fd = flookup.get(signature[2][0]) if not fd: if signature[2][0].startswith("cv."): fd = flookup.get(signature[2][0][3:]) if not fd: continue else: signature[2][0] = signature[2][0][3:] if signature[0] == "C": ffd = [f for f in fd if not f[0] and not f[1]] # filter out C++ stuff if not ffd: if fd[0][1]: logerror(ERROR_008_CFUNCISNOTGLOBAL, "function " + fd[0][2] + " is documented as C function but is actually member of " + fd[0][1] + " class", doc) elif fd[0][0]: logerror(ERROR_008_CFUNCISNOTGLOBAL, "function " + fd[0][2] + " is documented as C function but is actually placed in " + fd[0][0] + " namespace", doc) fd = ffd error = None for f in fd: match, error = compareSignatures(signature[2], f[3]) if match: signature.append(DOCUMENTED_MARKER) break if signature[-1] != DOCUMENTED_MARKER: candidates = "\n\t".join([formatSignature(f[3]) for f in fd]) logerror(ERROR_009_OVERLOADNOTFOUND, signature[0] + " function " + signature[2][0].replace(".","::") + " is documented but misses in headers (" + error + ").\nDocumented as:\n\t" + signature[1] + "\nCandidates are:\n\t" + candidates, doc) signature.append(DOCUMENTED_MARKER) # to stop subsequent error on this function # verify that all signatures was found in the library headers for name, doc in rst.iteritems(): # if doc.get(DOCUMENTED_MARKER, False): # continue # this class/struct was found if not doc.get(DOCUMENTED_MARKER, False) and (doc.get("isclass", False) or doc.get("isstruct", False)): if name in doc_signatures_whitelist: continue logerror(ERROR_010_UNKNOWNCLASS, "class/struct " + name + " is mentioned in documentation but is not found in OpenCV headers", doc) for d in doc.get("decls", []): if d[-1] != DOCUMENTED_MARKER: if d[0] == "C" or d[0] =="C++" or (do_python_crosscheck and d[0].startswith("Python")): if d[0][0] == 'C': sname = d[2][0][3:].replace(".", "::") if sname in defines: #TODO: need to find a way to verify #define's continue else: sname = d[1][:d[1].find("(")] prefixes = [x for x in doc_signatures_whitelist if sname.startswith(x)] if prefixes: # TODO: member of template class continue logerror(ERROR_011_UNKNOWNFUNC, d[0] + " function " + sname + " is documented but is not found in OpenCV headers. It is documented as:\n\t" + d[1], doc) # end of process_module if __name__ == "__main__": if len(sys.argv) < 2: print "Usage:\n", os.path.basename(sys.argv[0]), " <module path>" exit(0) modules = sys.argv[1:] if modules[0] == "all": modules = allmodules for module in modules: selfpath = os.path.dirname(os.path.abspath(sys.argv[0])) module_path = os.path.join(selfpath, "..", "modules", module) if not os.path.isdir(module_path): print "Module \"" + module + "\" could not be found." exit(1) process_module(module, module_path)