This program calculate file issues on specified packages, retrieved from Sonarqube Server.
Origin imperative program
from __future__ import division
import urllib2, json
SERV_ADDR = '10.0.2.74:9000'
FILE_LENGTH_ISSUE_KEY = 'checkstyle:com.puppycrawl.tools.checkstyle.checks.sizes.FileLengthCheck'
METHOD_RELATED_ISSUE_KEYS = ['checkstyle:com.puppycrawl.tools.checkstyle.checks.sizes.MethodLengthCheck',
'checkstyle:com.puppycrawl.tools.checkstyle.checks.sizes.ParameterNumberCheck',
'checkstyle:com.puppycrawl.tools.checkstyle.checks.metrics.CyclomaticComplexityCheck',
'checkstyle:com.puppycrawl.tools.checkstyle.checks.coding.NestedIfDepthCheck',
'checkstyle:com.puppycrawl.tools.checkstyle.checks.duplicates.StrictDuplicateCodeCheck']
def get_data(url):
conn = urllib2.urlopen(url)
return json.load(conn)
def get_issue_num(proj_name, pkg_key, issue_key):
issue_url = 'http://%s/api/issues/search?componentRoots=%s:%s&pageSize=5000&rules=%s'
data = get_data(issue_url % (SERV_ADDR, proj_name, pkg_key, issue_key))
issues = data['issues']
return len(issues)
def get_class_num(proj_name, pkg_key):
class_num_url = 'http://%s/api/resources?resource=%s:%s&metrics=classes'
data = get_data(class_num_url % (SERV_ADDR, proj_name, pkg_key))
return data[0]['msr'][0]['val']
def get_method_num(proj_name, pkg_key):
method_num_url = 'http://%s/api/resources?resource=%s:%s&metrics=functions'
data = get_data(method_num_url % (SERV_ADDR, proj_name, pkg_key))
return data[0]['msr'][0]['val']
def get_average_issue_ratio(proj_name, pkgs, issue_key, base_type_name):
total_issue_num = 0
total_base_num = 0
for pkg in pkgs:
total_issue_num += get_issue_num(proj_name, pkg, issue_key)
total_base_num += base_type_name(proj_name, pkg)
return total_issue_num / total_base_num
def file_length_check(proj_name, pkgs):
res = get_average_issue_ratio(proj_name, pkgs, FILE_LENGTH_ISSUE_KEY, get_class_num)
return '{:.2%}'.format(res)
def method_related_checks(proj_name, pkgs):
check_result = {}
for issue_key in METHOD_RELATED_ISSUE_KEYS:
res = get_average_issue_ratio(proj_name, pkgs, issue_key, get_method_num)
issue_name = issue_key.split('.')[-1]
check_result[issue_name] = '{:.2%}'.format(res)
return check_result
def get_test_lines(proj_name, pkg_key):
...
return covered_lines, valid_lines
def get_test_branches(proj_name, pkg_key):
...
return covered_branches, valid_branches
def get_average_coverage(proj_name, pkgs):
lines = []
for pkg in pkgs:
res = get_test_lines(proj_name, pkg)
lines.append(res)
total_covered_lines = 0
total_valid_lines = 0
for item in lines:
covered_lines, valid_lines = item
total_covered_lines += covered_lines
total_valid_lines += valid_lines
average_line_coverage = total_covered_lines / total_valid_lines
...
average_branch_coverage = total_covered_branches / total_valid_branches
return average_line_coverage, average_branch_coverage
List Comprehension
def get_average_issue_ratio(proj_name, pkgs, issue_key, base_type_name):
total_issue_num = sum(get_issue_num(proj_name, pkg, issue_key) for pkg in pkgs)
total_base_num = sum(base_type_name(proj_name, pkg) for pkg in pkgs)
return total_issue_num / total_base_num
def get_average_coverage_v1(proj_name, pkgs):
lines_info = [get_test_lines(proj_name, pkg) for pkg in pkgs]
line_cov = sum(cov_line for (cov_line,_) in lines_info) / sum(valid_line for (_,valid_line) in lines_info)
branches_info = [get_test_branches(proj_name, pkg) for pkg in pkgs]
branch_cov = sum(cov_branch for (cov_branch,_) in branches_info) / sum(valid_branch for (_,valid_branch) in branches_info)
return line_cov, branch_cov
You can see this declarative version is much readable than its imperative counterpart.
Note 1: Notice the usage of "base_type_name". As a function (first-class citizen), it's passed as a ordinary parameter into the method.
Note 2: manipulate a complex list in the following way (result = (1+2+3)/(10+20+30)):
col = [(1,10), (2,20), (3,30)]
result = sum([x for (x,_) in col]) / sum([y for (_,y) in col])
in "get_average_coverage_v1" is still too complicated. So in the following sections you can see some more concise (and FP style) versions.
Dictionary Comprehension
def method_related_check_func(proj_name, pkgs):
return {issue_key.split('.')[-1]:
'{:.2%}'.format(get_average_issue_ratio(proj_name, pkgs,
issue_key,
get_method_num))
for issue_key in METHOD_RELATED_ISSUE_KEYS}
Its imperative counterpart has better code shape, but this functional version is more expressive.
Ref: http://stackoverflow.com/questions/14507591/python-dictionary-comprehension
Use zip, map and apply
def get_average_coverage_v2(proj_name, pkgs):
total_covered_lines, total_valid_lines = \
apply(lambda *x: map(lambda *y: sum(y), *x),
[get_test_lines(proj_name, pkg) for pkg in pkgs])
return total_covered_lines / total_valid_lines
def get_cov_ratio(col):
covered, valid = map(sum, apply(zip, col))
return covered / valid
def get_coverage_data_v3(proj_name, pkgs):
return get_cov_ratio([get_test_lines(proj_name, pkg) for pkg in pkgs])
def get_coverage_data_v4(proj_name, pkgs):
cov_ratio = lambda col: reduce(operator.truediv, map(sum, apply(zip, col)))
return cov_ratio([get_test_lines(proj_name, pkg) for pkg in pkgs])
v2 works, but it's too complicated and hard to understand. But there are some useful things in it: variable arguments in lambda expression.
v3 and v4 are almost the same:
-
convert [(1,2), (11,22),(111,222)] to [(1,11,111),(2,22,222)] with "apply(zip, ...)";
-
add every element up (convert to [1+11+111, 2+22+222]) with "map(sum, ...)";
-
first element divided by the second: using external function in v3, or "reduce" in v4;
Clearly this FP version is much readable than its imperative counterpart. You have to add "import operator" for v4.
The corresponding clojure function is:
(defn trans-metrics [col]
(/ (apply + (map first col)) (apply + (map second col))))
or as function literal: #(/ (apply + (map first %)) (apply + (map second %)))
It looks more readable than its Python counterpart.
Final Script
Note: Run Python script at command line with user defined functions:
For example you write a module named "ex.py", and define a function "getdata" in it.
Now run the following command in its directory:
(the current working directory will be added in PYTHONPATH automatically)
python -c 'import ex;print ex.getdata("bb")'
The ex.py and all functions in it are "stable" business logic. While the command ("print ex.getdata()" in this case) is "how to use this tool", which will change frequently. Thus we divide frequent changed part from stable logic, which is a best practice of programming.