Commit 7e698726 authored by Andrey Kamaev's avatar Andrey Kamaev

Added new script for checking correctness of documentation

parent 9d73b5f5
import os, sys, glob, re
import hdr_parser as hp
import rst_parser as rp
rp.show_warnings = False
rp.show_errors = False
do_python_crosscheck = True
errors_disabled = [ERROR_004_MISSEDNAMESPACE]
if do_python_crosscheck:
import cv2
except ImportError:
print "Could not load cv2"
do_python_crosscheck = False
def get_cv2_object(name):
if name.startswith("cv2."):
name = name[4:]
if name.startswith("cv."):
name = name[3:]
if name == "Algorithm":
return cv2.Algorithm__create("Feature2D.ORB"), name
elif name == "FeatureDetector":
return cv2.FeatureDetector_create("ORB"), name
elif name == "DescriptorExtractor":
return cv2.DescriptorExtractor_create("ORB"), name
elif name == "BackgroundSubtractor":
return cv2.BackgroundSubtractorMOG(), name
elif name == "StatModel":
return cv2.KNearest(), name
return getattr(cv2, name)(), name
def compareSignatures(f, s):
# function names
if f[0] != s[0]:
return False, "name mismatch"
# return type
stype = (s[1] or "void")
ftype = f[1]
if stype.startswith("cv::"):
stype = stype[4:]
if ftype and ftype.startswith("cv::"):
ftype = ftype[4:]
if ftype and ftype != stype:
return False, "return type mismatch"
if ("\C" in f[2]) ^ ("\C" in s[2]):
return False, "const qulifier mismatch"
if len(f[3]) != len(s[3]):
return False, "different number of arguments"
for idx, arg in enumerate(zip(f[3], s[3])):
farg = arg[0]
sarg = arg[1]
ftype = re.sub(r"\bcv::", "", (farg[0] or ""))
stype = re.sub(r"\bcv::", "", (sarg[0] or ""))
if ftype != stype:
return False, "type of argument #" + str(idx+1) + " mismatch"
fname = farg[1] or "arg" + str(idx)
sname = sarg[1] or "arg" + str(idx)
if fname != sname:
return False, "name of argument #" + str(idx+1) + " mismatch"
fdef = re.sub(r"\bcv::", "", (farg[2] or ""))
sdef = re.sub(r"\bcv::", "", (sarg[2] or ""))
if fdef != sdef:
return False, "default value of argument #" + str(idx+1) + " mismatch"
return True, "match"
def formatSignature(s):
_str = ""
if s[1]:
_str += s[1] + " "
if not bool(re.match(r"(cv\.)?(?P<cls>\w+)\.(?P=cls)", s[0])):
_str += "void "
if s[0].startswith("cv."):
_str += s[0][3:].replace(".", "::")
_str += s[0].replace(".", "::")
if len(s[3]) == 0:
_str += "()"
_str += "( "
for idx, arg in enumerate(s[3]):
if idx > 0:
_str += ", "
_str += re.sub(r"\bcv::", "", arg[0]) + " "
if arg[1]:
_str += arg[1]
_str += "arg" + str(idx)
if arg[2]:
_str += "=" + re.sub(r"\bcv::", "", arg[2])
_str += " )"
if "/C" in s[2]:
_str += " const"
return _str
def logerror(code, message, doc = None):
if code in errors_disabled:
if doc:
print doc["file"] + ":" + str(doc["line"]),
print "error %03d: %s" % (code, message)
def process_module(module, path):
hppparser = hp.CppHeaderParser()
rstparser = rp.RstParser(hppparser)
rstparser.parse(module, path)
rst = rstparser.definitions
hdrlist = glob.glob(os.path.join(path, "include", "opencv2", module, "*.h*"))
hdrlist.extend(glob.glob(os.path.join(path, "include", "opencv2", module, "detail", "*.h*")))
decls = []
for hname in hdrlist:
if not "ts_gtest.h" in hname:
decls += hppparser.parse(hname, wmode=False)
funcs = []
# not really needed to hardcode all the namespaces. Normally all they are collected automatically
namespaces = ['cv', 'cv.gpu', 'cvflann', 'cvflann.anyimpl', 'cvflann.lsh', 'cv.flann', 'cv.linemod', 'cv.detail', 'cvtest', 'perf', 'cv.videostab']
classes = []
structs = []
# collect namespaces and classes/structs
for decl in decls:
if decl[0].startswith("const"):
elif decl[0].startswith("class") or decl[0].startswith("struct"):
if decl[0][0] == 'c':
dotIdx = decl[0].rfind('.')
if dotIdx > 0:
namespace = decl[0][decl[0].find(' ')+1:dotIdx]
if not [c for c in classes if c[0].endswith(namespace)] and not [s for s in structs if s[0].endswith(namespace)]:
if namespace not in namespaces:
clsnamespaces = []
# process classes
for cl in classes:
name = cl[0][cl[0].find(' ')+1:]
if name.find('.') < 0 and not name.startswith("Cv"):
logerror(ERROR_004_MISSEDNAMESPACE, "class " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention")
if do_python_crosscheck and not name.startswith("cv.") and name.startswith("Cv"):
clsnamespaces.append("cv." + name[2:])
if name.startswith("cv."):
name = name[3:]
name = name.replace(".", "::")
doc = rst.get(name)
if not doc:
#TODO: class is not documented
# verify class marker
if not doc.get("isclass"):
logerror(ERROR_001_NOTACLASS, "class " + name + " is not marked as \"class\" in documentation", doc)
# verify base
signature = doc.get("class", "")
signature = signature.replace(", public ", " ").replace(" public ", " ")
signature = signature.replace(", protected ", " ").replace(" protected ", " ")
signature = signature.replace(", private ", " ").replace(" private ", " ")
signature = ("class " + signature).strip()
hdrsignature = (cl[0] + " " + cl[1]).replace("class cv.", "class ").replace(".", "::").strip()
if signature != hdrsignature:
logerror(ERROR_003_INCORRECTBASE, "invalid base class documentation\ndocumented: " + signature + "\nactual: " + hdrsignature, doc)
# process structs
for st in structs:
name = st[0][st[0].find(' ')+1:]
if name.find('.') < 0 and not name.startswith("Cv"):
logerror(ERROR_004_MISSEDNAMESPACE, "struct " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention")
if name.startswith("cv."):
name = name[3:]
name = name.replace(".", "::")
doc = rst.get(name)
if not doc:
#TODO: struct is not documented
# verify struct marker
if not doc.get("isstruct"):
logerror(ERROR_002_NOTASTRUCT, "struct " + name + " is not marked as \"struct\" in documentation", doc)
# verify base
signature = doc.get("class", "")
signature = signature.replace(", public ", " ").replace(" public ", " ")
signature = signature.replace(", protected ", " ").replace(" protected ", " ")
signature = signature.replace(", private ", " ").replace(" private ", " ")
signature = ("struct " + signature).strip()
hdrsignature = (st[0] + " " + st[1]).replace("struct cv.", "struct ").replace(".", "::").strip()
if signature != hdrsignature:
logerror(ERROR_003_INCORRECTBASE, "invalid base struct documentation\ndocumented: " + signature + "\nactual: " + hdrsignature, doc)
# process functions and methods
flookup = {}
for fn in funcs:
name = fn[0]
parent = None
namespace = None
for cl in clsnamespaces:
if name.startswith(cl + "."):
if cl.startswith(parent or ""):
parent = cl
if parent:
name = name[len(parent) + 1:]
for nm in namespaces:
if parent.startswith(nm + "."):
if nm.startswith(namespace or ""):
namespace = nm
if namespace:
parent = parent[len(namespace) + 1:]
for nm in namespaces:
if name.startswith(nm + "."):
if nm.startswith(namespace or ""):
namespace = nm
if namespace:
name = name[len(namespace) + 1:]
#print namespace, parent, name, fn[0]
if not namespace and not parent and not name.startswith("cv") and not name.startswith("CV_"):
logerror(ERROR_004_MISSEDNAMESPACE, "function " + name + " from opencv_" + module + " is placed in global namespace but violates C-style naming convention")
fdescr = (namespace, parent, name, fn)
flookup_entry = flookup.get(fn[0], [])
flookup[fn[0]] = flookup_entry
if do_python_crosscheck:
for name, doc in rst.iteritems():
decls = doc.get("decls")
if not decls:
for signature in decls:
if signature[0] == "Python1":
pname = signature[1][:signature[1].find('(')]
fn = getattr(, pname[3:])
docstr = "cv." + fn.__doc__
except AttributeError:
logerror(ERROR_005_MISSINGPYFUNC, "could not load documented function: cv2." + pname, doc)
docstring = docstr
sign = signature[1]
# convert old signature to pydoc style
if docstring.endswith("*"):
docstring = docstring[:-1]
s = None
while s != sign:
s = sign
sign = re.sub(r"^(.*\(.*)\(.*?\)(.*\) *->)", "\\1_\\2", sign)
s = None
while s != sign:
s = sign
sign = re.sub(r"\s*,\s*([^,]+)\s*=\s*[^,]+\s*(( \[.*\])?)\)", " [, \\1\\2])", sign)
sign = re.sub(r"\(\s*([^,]+)\s*=\s*[^,]+\s*(( \[.*\])?)\)", "([\\1\\2])", sign)
sign = re.sub(r"\)\s*->\s*", ") -> ", sign)
sign = sign.replace("-> convexHull", "-> CvSeq")
sign = sign.replace("-> lines", "-> CvSeq")
sign = sign.replace("-> boundingRects", "-> CvSeq")
sign = sign.replace("-> contours", "-> CvSeq")
sign = sign.replace("-> retval", "-> int")
sign = sign.replace("-> detectedObjects", "-> CvSeqOfCvAvgComp")
def retvalRplace(match):
m =
m = m.replace("CvScalar", "scalar")
m = m.replace("CvMemStorage", "memstorage")
m = m.replace("ROIplImage", "image")
m = m.replace("IplImage", "image")
m = m.replace("ROCvMat", "mat")
m = m.replace("CvMat", "mat")
m = m.replace("double", "float")
m = m.replace("CvSubdiv2DPoint", "point")
m = m.replace("CvBox2D", "Box2D")
m = m.replace("IplConvKernel", "kernel")
m = m.replace("CvHistogram", "hist")
m = m.replace("CvSize", "width,height")
m = m.replace("cvmatnd", "matND")
m = m.replace("CvSeqOfCvConvexityDefect", "convexityDefects")
mm = m.split(',')
if len(mm) > 1:
return "(" + ", ".join(mm) + ")"
return m
docstring = re.sub(r"(?<=-> )(.*)$", retvalRplace, docstring)
docstring = docstring.replace("( [, ", "([")
if sign != docstring:
logerror(ERROR_006_INVALIDPYOLDDOC, "old-style documentation differs from pydoc\npydoc: " + docstring + "\nfixup: " + sign + "\ncvdoc: " + signature[1], doc)
elif signature[0] == "Python2":
pname = signature[1][4:signature[1].find('(')]
cvname = "cv." + pname
parent = None
for cl in clsnamespaces:
if cvname.startswith(cl + "."):
if cl.startswith(parent or ""):
parent = cl
if parent:
instance, clsname = get_cv2_object(parent)
fn = getattr(instance, cvname[len(parent)+1:])
docstr = fn.__doc__
docprefix = "cv2." + clsname + "."
fn = getattr(cv2, pname)
docstr = fn.__doc__
docprefix = "cv2."
except AttributeError:
if parent:
logerror(ERROR_005_MISSINGPYFUNC, "could not load documented member of " + parent + " class: cv2." + pname, doc)
logerror(ERROR_005_MISSINGPYFUNC, "could not load documented function cv2." + pname, doc)
docstrings = [docprefix + s.replace("([, ", "([") for s in docstr.split(" or ")]
if not signature[1] in docstrings:
pydocs = "\npydoc: ".join(docstrings)
logerror(ERROR_007_INVALIDPYDOC, "documentation differs from pydoc\npydoc: " + pydocs + "\ncvdoc: " + signature[1], doc)
#build dictionary for functions lookup
# verify C/C++ signatures
for name, doc in rst.iteritems():
decls = doc.get("decls")
if not decls:
for signature in decls:
if signature[0] == "C" or signature[0] == "C++":
fd = flookup.get(signature[2][0])
if not fd:
if signature[2][0].startswith("cv."):
fd = flookup.get(signature[2][0][3:])
if not fd:
signature[2][0] = signature[2][0][3:]
if signature[0] == "C":
ffd = [f for f in fd if not f[0] and not f[1]] # filter out C++ stuff
if not ffd:
if fd[0][1]:
logerror(ERROR_008_CFUNCISNOTGLOBAL, "function " + fd[0][2] + " is documented as C function but is actually member of " + fd[0][1] + " class", doc)
elif fd[0][0]:
logerror(ERROR_008_CFUNCISNOTGLOBAL, "function " + fd[0][2] + " is documented as C function but is actually placed in " + fd[0][0] + " namespace", doc)
fd = ffd
error = None
for f in fd:
match, error = compareSignatures(signature[2], f[3])
if match:
if signature[-1] != DOCUMENTED_MARKER:
candidates = "\n\t".join([formatSignature(f[3]) for f in fd])
logerror(ERROR_009_OVERLOADNOTFOUND, signature[0] + " function " + signature[2][0].replace(".","::") + " is documented but misses in headers (" + error + ").\nDocumented as:\n\t" + signature[1] + "\nCandidates are:\n\t" + candidates, doc)
#print hdrlist
#for d in decls:
# print d
#print rstparser.definitions
if __name__ == "__main__":
if len(sys.argv) < 2:
print "Usage:\n", os.path.basename(sys.argv[0]), " <module path>"
for module in sys.argv[1:]:
selfpath = os.path.dirname(os.path.abspath(sys.argv[0]))
module_path = os.path.join(selfpath, "..", "modules", module)
if not os.path.isdir(module_path):
print "Module \"" + module + "\" could not be found."
process_module(module, module_path)
...@@ -3,6 +3,7 @@ allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d" ...@@ -3,6 +3,7 @@ allmodules = ["core", "flann", "imgproc", "ml", "highgui", "video", "features2d"
verbose = False verbose = False
show_warnings = True show_warnings = True
show_errors = True show_errors = True
show_critical_errors = True
params_blacklist = { params_blacklist = {
"fromarray" : ("object", "allowND"), # python only function "fromarray" : ("object", "allowND"), # python only function
...@@ -271,8 +272,8 @@ class RstParser(object): ...@@ -271,8 +272,8 @@ class RstParser(object):
# endfor l in lines # endfor l in lines
if fdecl.balance != 0: if fdecl.balance != 0:
if show_errors: if show_critical_errors:
print >> sys.stderr, "RST parser error: invalid parentheses balance in \"%s\" File: %s (line %s)" % (section_name, file_name, lineno) print >> sys.stderr, "RST parser error: invalid parentheses balance in \"%s\" File: %s:%s" % (section_name, file_name, lineno)
return return
# save last parameter if needed # save last parameter if needed
...@@ -346,9 +347,9 @@ class RstParser(object): ...@@ -346,9 +347,9 @@ class RstParser(object):
decls = func.get("decls",[]) decls = func.get("decls",[])
if (decl.lang == "C++" or decl.lang == "C"): if (decl.lang == "C++" or decl.lang == "C"):
rst_decl = self.cpp_parser.parse_func_decl_no_wrap(decl.fdecl) rst_decl = self.cpp_parser.parse_func_decl_no_wrap(decl.fdecl)
decls.append( (decl.lang, decl.fdecl, rst_decl) ) decls.append( [decl.lang, decl.fdecl, rst_decl] )
else: else:
decls.append( (decl.lang, decl.fdecl) ) decls.append( [decl.lang, decl.fdecl] )
func["decls"] = decls func["decls"] = decls
def add_new_pdecl(self, func, decl): def add_new_pdecl(self, func, decl):
...@@ -242,7 +242,7 @@ class CppHeaderParser(object): ...@@ -242,7 +242,7 @@ class CppHeaderParser(object):
bases = ll[2:] bases = ll[2:]
return classname, bases, modlist return classname, bases, modlist
def parse_func_decl_no_wrap(self, decl_str): def parse_func_decl_no_wrap(self, decl_str, static_method = False):
fdecl = decl_str.replace("CV_OUT", "").replace("CV_IN_OUT", "") fdecl = decl_str.replace("CV_OUT", "").replace("CV_IN_OUT", "")
fdecl = fdecl.strip().replace("\t", " ") fdecl = fdecl.strip().replace("\t", " ")
while " " in fdecl: while " " in fdecl:
...@@ -273,9 +273,16 @@ class CppHeaderParser(object): ...@@ -273,9 +273,16 @@ class CppHeaderParser(object):
fname = "cv." + fname.replace("::", ".") fname = "cv." + fname.replace("::", ".")
decl = [fname, rettype, [], []] decl = [fname, rettype, [], []]
# inline constructor implementation
implmatch = re.match(r"(\(.*?\))\s*:\s*(\w+\(.*?\),?\s*)+", fdecl[apos:])
if bool(implmatch):
fdecl = fdecl[:apos] +
args0str = fdecl[apos+1:fdecl.rfind(")")].strip() args0str = fdecl[apos+1:fdecl.rfind(")")].strip()
if args0str != "": if args0str != "" and args0str != "void":
args0str = re.sub(r"\([^)]*\)", lambda m:',', "@comma@"), args0str)
args0 = args0str.split(",") args0 = args0str.split(",")
args = [] args = []
...@@ -293,9 +300,19 @@ class CppHeaderParser(object): ...@@ -293,9 +300,19 @@ class CppHeaderParser(object):
defval = "" defval = ""
if dfpos >= 0: if dfpos >= 0:
defval = arg[dfpos+1:].strip() defval = arg[dfpos+1:].strip()
dfpos = arg.find("CV_DEFAULT")
if dfpos >= 0:
defval, pos3 = self.get_macro_arg(arg, dfpos)
dfpos = arg.find("CV_WRAP_DEFAULT")
if dfpos >= 0:
defval, pos3 = self.get_macro_arg(arg, dfpos)
if dfpos >= 0:
defval = defval.replace("@comma@", ",")
arg = arg[:dfpos].strip() arg = arg[:dfpos].strip()
pos = len(arg)-1 pos = len(arg)-1
while pos >= 0 and (arg[pos] == "_" or arg[pos].isalpha() or arg[pos].isdigit()): while pos >= 0 and (arg[pos] in "_[]" or arg[pos].isalpha() or arg[pos].isdigit()):
pos -= 1 pos -= 1
if pos >= 0: if pos >= 0:
aname = arg[pos+1:].strip() aname = arg[pos+1:].strip()
...@@ -308,6 +325,10 @@ class CppHeaderParser(object): ...@@ -308,6 +325,10 @@ class CppHeaderParser(object):
aname = "param" aname = "param"
decl[3].append([atype, aname, defval, []]) decl[3].append([atype, aname, defval, []])
if static_method:
if decl_str.endswith("const"):
return decl return decl
def parse_func_decl(self, decl_str): def parse_func_decl(self, decl_str):
...@@ -328,7 +349,7 @@ class CppHeaderParser(object): ...@@ -328,7 +349,7 @@ class CppHeaderParser(object):
return [] return []
# ignore old API in the documentation check (for now) # ignore old API in the documentation check (for now)
if "CVAPI(" in decl_str: if "CVAPI(" in decl_str and self.wrap_mode:
return [] return []
top = self.block_stack[-1] top = self.block_stack[-1]
...@@ -378,6 +399,10 @@ class CppHeaderParser(object): ...@@ -378,6 +399,10 @@ class CppHeaderParser(object):
sys.exit(-1) sys.exit(-1)
decl_start = decl_str[:args_begin].strip() decl_start = decl_str[:args_begin].strip()
# constructor/destructor case
if bool(re.match(r'(\w+::)*(?P<x>\w+)::~?(?P=x)', decl_start)):
decl_start = "void " + decl_start
rettype, funcname, modlist, argno = self.parse_arg(decl_start, -1) rettype, funcname, modlist, argno = self.parse_arg(decl_start, -1)
if argno >= 0: if argno >= 0:
...@@ -385,7 +410,15 @@ class CppHeaderParser(object): ...@@ -385,7 +410,15 @@ class CppHeaderParser(object):
if rettype == classname or rettype == "~" + classname: if rettype == classname or rettype == "~" + classname:
rettype, funcname = "", rettype rettype, funcname = "", rettype
else: else:
print "Error at %d. the function/method name is missing: '%s'" % (self.lineno, decl_start) if bool(re.match('\w+\s+\(\*\w+\)\s*\(.*\)', decl_str)):
return [] # function typedef
elif bool(re.match('[A-Z_]+', decl_start)):
return [] # it seems to be a macro instantiation
elif "__declspec" == decl_start:
return []
#print rettype, funcname, modlist, argno
print "Error at %d in %s. the function/method name is missing: '%s'" % (self.lineno, self.hname, decl_start)
sys.exit(-1) sys.exit(-1)
if self.wrap_mode and (("::" in funcname) or funcname.startswith("~")): if self.wrap_mode and (("::" in funcname) or funcname.startswith("~")):
...@@ -399,7 +432,7 @@ class CppHeaderParser(object): ...@@ -399,7 +432,7 @@ class CppHeaderParser(object):
funcname = self.get_dotted_name(funcname) funcname = self.get_dotted_name(funcname)
if not self.wrap_mode: if not self.wrap_mode:
decl = self.parse_func_decl_no_wrap(decl_str) decl = self.parse_func_decl_no_wrap(decl_str, static_method)
decl[0] = funcname decl[0] = funcname
return decl return decl
...@@ -515,7 +548,7 @@ class CppHeaderParser(object): ...@@ -515,7 +548,7 @@ class CppHeaderParser(object):
sys.exit(-1) sys.exit(-1)
if block_name: if block_name:
n += block_name + "." n += block_name + "."
return n + name return n + name.replace("::", ".")
def parse_stmt(self, stmt, end_token): def parse_stmt(self, stmt, end_token):
""" """
...@@ -559,7 +592,7 @@ class CppHeaderParser(object): ...@@ -559,7 +592,7 @@ class CppHeaderParser(object):
stmt_type = stmt.split()[0] stmt_type = stmt.split()[0]
classname, bases, modlist = self.parse_class_decl(stmt) classname, bases, modlist = self.parse_class_decl(stmt)
decl = [] decl = []
if ("CV_EXPORTS_W" in stmt) or ("CV_EXPORTS_AS" in stmt) or (not self.wrap_mode and ("CV_EXPORTS" in stmt)): if ("CV_EXPORTS_W" in stmt) or ("CV_EXPORTS_AS" in stmt) or (not self.wrap_mode):# and ("CV_EXPORTS" in stmt)):
decl = [stmt_type + " " + self.get_dotted_name(classname), "", modlist, []] decl = [stmt_type + " " + self.get_dotted_name(classname), "", modlist, []]
if bases: if bases:
decl[1] = ": " + " ".join(bases) decl[1] = ": " + " ".join(bases)
...@@ -570,6 +603,8 @@ class CppHeaderParser(object): ...@@ -570,6 +603,8 @@ class CppHeaderParser(object):
if stmt.startswith("namespace"): if stmt.startswith("namespace"):
stmt_list = stmt.split() stmt_list = stmt.split()
if len(stmt_list) < 2:
return stmt_list[0], stmt_list[1], True, None return stmt_list[0], stmt_list[1], True, None
if stmt.startswith("extern") and "\"C\"" in stmt: if stmt.startswith("extern") and "\"C\"" in stmt:
return "namespace", "", True, None return "namespace", "", True, None
...@@ -633,6 +668,7 @@ class CppHeaderParser(object): ...@@ -633,6 +668,7 @@ class CppHeaderParser(object):
The main method. Parses the input file. The main method. Parses the input file.
Returns the list of declarations (that can be print using print_decls) Returns the list of declarations (that can be print using print_decls)
""" """
self.hname = hname
decls = [] decls = []
f = open(hname, "rt") f = open(hname, "rt")
linelist = list(f.readlines()) linelist = list(f.readlines())
