pkg-stats 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596
  1. #!/usr/bin/env python
  2. # Copyright (C) 2009 by Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
  3. #
  4. # This program is free software; you can redistribute it and/or modify
  5. # it under the terms of the GNU General Public License as published by
  6. # the Free Software Foundation; either version 2 of the License, or
  7. # (at your option) any later version.
  8. #
  9. # This program is distributed in the hope that it will be useful,
  10. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  12. # General Public License for more details.
  13. #
  14. # You should have received a copy of the GNU General Public License
  15. # along with this program; if not, write to the Free Software
  16. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  17. import argparse
  18. import datetime
  19. import fnmatch
  20. import os
  21. from collections import defaultdict
  22. import re
  23. import subprocess
  24. import sys
  25. import requests # URL checking
  26. from multiprocessing import Pool
  27. INFRA_RE = re.compile("\$\(eval \$\(([a-z-]*)-package\)\)")
  28. URL_RE = re.compile("\s*https?://\S*\s*$")
  29. class Package:
  30. all_licenses = list()
  31. all_license_files = list()
  32. all_versions = dict()
  33. def __init__(self, name, path):
  34. self.name = name
  35. self.path = path
  36. self.infras = None
  37. self.has_license = False
  38. self.has_license_files = False
  39. self.has_hash = False
  40. self.patch_count = 0
  41. self.warnings = 0
  42. self.current_version = None
  43. self.url = None
  44. self.url_status = None
  45. self.url_worker = None
  46. def pkgvar(self):
  47. return self.name.upper().replace("-", "_")
  48. def set_url(self):
  49. """
  50. Fills in the .url field
  51. """
  52. self.url_status = "No Config.in"
  53. for filename in os.listdir(os.path.dirname(self.path)):
  54. if fnmatch.fnmatch(filename, 'Config.*'):
  55. fp = open(os.path.join(os.path.dirname(self.path), filename), "r")
  56. for config_line in fp:
  57. if URL_RE.match(config_line):
  58. self.url = config_line.strip()
  59. self.url_status = "Found"
  60. fp.close()
  61. return
  62. self.url_status = "Missing"
  63. fp.close()
  64. def set_infra(self):
  65. """
  66. Fills in the .infras field
  67. """
  68. self.infras = list()
  69. with open(self.path, 'r') as f:
  70. lines = f.readlines()
  71. for l in lines:
  72. match = INFRA_RE.match(l)
  73. if not match:
  74. continue
  75. infra = match.group(1)
  76. if infra.startswith("host-"):
  77. self.infras.append(("host", infra[5:]))
  78. else:
  79. self.infras.append(("target", infra))
  80. def set_license(self):
  81. """
  82. Fills in the .has_license and .has_license_files fields
  83. """
  84. var = self.pkgvar()
  85. if var in self.all_licenses:
  86. self.has_license = True
  87. if var in self.all_license_files:
  88. self.has_license_files = True
  89. def set_hash_info(self):
  90. """
  91. Fills in the .has_hash field
  92. """
  93. hashpath = self.path.replace(".mk", ".hash")
  94. self.has_hash = os.path.exists(hashpath)
  95. def set_patch_count(self):
  96. """
  97. Fills in the .patch_count field
  98. """
  99. self.patch_count = 0
  100. pkgdir = os.path.dirname(self.path)
  101. for subdir, _, _ in os.walk(pkgdir):
  102. self.patch_count += len(fnmatch.filter(os.listdir(subdir), '*.patch'))
  103. def set_current_version(self):
  104. """
  105. Fills in the .current_version field
  106. """
  107. var = self.pkgvar()
  108. if var in self.all_versions:
  109. self.current_version = self.all_versions[var]
  110. def set_check_package_warnings(self):
  111. """
  112. Fills in the .warnings field
  113. """
  114. cmd = ["./utils/check-package"]
  115. pkgdir = os.path.dirname(self.path)
  116. for root, dirs, files in os.walk(pkgdir):
  117. for f in files:
  118. if f.endswith(".mk") or f.endswith(".hash") or f == "Config.in" or f == "Config.in.host":
  119. cmd.append(os.path.join(root, f))
  120. o = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()[1]
  121. lines = o.splitlines()
  122. for line in lines:
  123. m = re.match("^([0-9]*) warnings generated", line)
  124. if m:
  125. self.warnings = int(m.group(1))
  126. return
  127. def __eq__(self, other):
  128. return self.path == other.path
  129. def __lt__(self, other):
  130. return self.path < other.path
  131. def __str__(self):
  132. return "%s (path='%s', license='%s', license_files='%s', hash='%s', patches=%d)" % \
  133. (self.name, self.path, self.has_license, self.has_license_files, self.has_hash, self.patch_count)
  134. def get_pkglist(npackages, package_list):
  135. """
  136. Builds the list of Buildroot packages, returning a list of Package
  137. objects. Only the .name and .path fields of the Package object are
  138. initialized.
  139. npackages: limit to N packages
  140. package_list: limit to those packages in this list
  141. """
  142. WALK_USEFUL_SUBDIRS = ["boot", "linux", "package", "toolchain"]
  143. WALK_EXCLUDES = ["boot/common.mk",
  144. "linux/linux-ext-.*.mk",
  145. "package/freescale-imx/freescale-imx.mk",
  146. "package/gcc/gcc.mk",
  147. "package/gstreamer/gstreamer.mk",
  148. "package/gstreamer1/gstreamer1.mk",
  149. "package/gtk2-themes/gtk2-themes.mk",
  150. "package/matchbox/matchbox.mk",
  151. "package/opengl/opengl.mk",
  152. "package/qt5/qt5.mk",
  153. "package/x11r7/x11r7.mk",
  154. "package/doc-asciidoc.mk",
  155. "package/pkg-.*.mk",
  156. "package/nvidia-tegra23/nvidia-tegra23.mk",
  157. "toolchain/toolchain-external/pkg-toolchain-external.mk",
  158. "toolchain/toolchain-external/toolchain-external.mk",
  159. "toolchain/toolchain.mk",
  160. "toolchain/helpers.mk",
  161. "toolchain/toolchain-wrapper.mk"]
  162. packages = list()
  163. count = 0
  164. for root, dirs, files in os.walk("."):
  165. rootdir = root.split("/")
  166. if len(rootdir) < 2:
  167. continue
  168. if rootdir[1] not in WALK_USEFUL_SUBDIRS:
  169. continue
  170. for f in files:
  171. if not f.endswith(".mk"):
  172. continue
  173. # Strip ending ".mk"
  174. pkgname = f[:-3]
  175. if package_list and pkgname not in package_list:
  176. continue
  177. pkgpath = os.path.join(root, f)
  178. skip = False
  179. for exclude in WALK_EXCLUDES:
  180. # pkgpath[2:] strips the initial './'
  181. if re.match(exclude, pkgpath[2:]):
  182. skip = True
  183. continue
  184. if skip:
  185. continue
  186. p = Package(pkgname, pkgpath)
  187. packages.append(p)
  188. count += 1
  189. if npackages and count == npackages:
  190. return packages
  191. return packages
  192. def package_init_make_info():
  193. # Licenses
  194. o = subprocess.check_output(["make", "BR2_HAVE_DOT_CONFIG=y",
  195. "-s", "printvars", "VARS=%_LICENSE"])
  196. for l in o.splitlines():
  197. # Get variable name and value
  198. pkgvar, value = l.split("=")
  199. # If present, strip HOST_ from variable name
  200. if pkgvar.startswith("HOST_"):
  201. pkgvar = pkgvar[5:]
  202. # Strip _LICENSE
  203. pkgvar = pkgvar[:-8]
  204. # If value is "unknown", no license details available
  205. if value == "unknown":
  206. continue
  207. Package.all_licenses.append(pkgvar)
  208. # License files
  209. o = subprocess.check_output(["make", "BR2_HAVE_DOT_CONFIG=y",
  210. "-s", "printvars", "VARS=%_LICENSE_FILES"])
  211. for l in o.splitlines():
  212. # Get variable name and value
  213. pkgvar, value = l.split("=")
  214. # If present, strip HOST_ from variable name
  215. if pkgvar.startswith("HOST_"):
  216. pkgvar = pkgvar[5:]
  217. if pkgvar.endswith("_MANIFEST_LICENSE_FILES"):
  218. continue
  219. # Strip _LICENSE_FILES
  220. pkgvar = pkgvar[:-14]
  221. Package.all_license_files.append(pkgvar)
  222. # Version
  223. o = subprocess.check_output(["make", "BR2_HAVE_DOT_CONFIG=y",
  224. "-s", "printvars", "VARS=%_VERSION"])
  225. # We process first the host package VERSION, and then the target
  226. # package VERSION. This means that if a package exists in both
  227. # target and host variants, with different version numbers
  228. # (unlikely), we'll report the target version number.
  229. version_list = o.splitlines()
  230. version_list = [x for x in version_list if x.startswith("HOST_")] + \
  231. [x for x in version_list if not x.startswith("HOST_")]
  232. for l in version_list:
  233. # Get variable name and value
  234. pkgvar, value = l.split("=")
  235. # If present, strip HOST_ from variable name
  236. if pkgvar.startswith("HOST_"):
  237. pkgvar = pkgvar[5:]
  238. if pkgvar.endswith("_DL_VERSION"):
  239. continue
  240. # Strip _VERSION
  241. pkgvar = pkgvar[:-8]
  242. Package.all_versions[pkgvar] = value
  243. def check_url_status_worker(url, url_status):
  244. if url_status != "Missing" and url_status != "No Config.in":
  245. try:
  246. url_status_code = requests.head(url, timeout=30).status_code
  247. if url_status_code >= 400:
  248. return "Invalid(%s)" % str(url_status_code)
  249. except requests.exceptions.RequestException:
  250. return "Invalid(Err)"
  251. return "Ok"
  252. return url_status
  253. def check_package_urls(packages):
  254. Package.pool = Pool(processes=64)
  255. for pkg in packages:
  256. pkg.url_worker = pkg.pool.apply_async(check_url_status_worker, (pkg.url, pkg.url_status))
  257. for pkg in packages:
  258. pkg.url_status = pkg.url_worker.get(timeout=3600)
  259. def calculate_stats(packages):
  260. stats = defaultdict(int)
  261. for pkg in packages:
  262. # If packages have multiple infra, take the first one. For the
  263. # vast majority of packages, the target and host infra are the
  264. # same. There are very few packages that use a different infra
  265. # for the host and target variants.
  266. if len(pkg.infras) > 0:
  267. infra = pkg.infras[0][1]
  268. stats["infra-%s" % infra] += 1
  269. else:
  270. stats["infra-unknown"] += 1
  271. if pkg.has_license:
  272. stats["license"] += 1
  273. else:
  274. stats["no-license"] += 1
  275. if pkg.has_license_files:
  276. stats["license-files"] += 1
  277. else:
  278. stats["no-license-files"] += 1
  279. if pkg.has_hash:
  280. stats["hash"] += 1
  281. else:
  282. stats["no-hash"] += 1
  283. stats["patches"] += pkg.patch_count
  284. return stats
  285. html_header = """
  286. <head>
  287. <script src=\"https://www.kryogenix.org/code/browser/sorttable/sorttable.js\"></script>
  288. <style type=\"text/css\">
  289. table {
  290. width: 100%;
  291. }
  292. td {
  293. border: 1px solid black;
  294. }
  295. td.centered {
  296. text-align: center;
  297. }
  298. td.wrong {
  299. background: #ff9a69;
  300. }
  301. td.correct {
  302. background: #d2ffc4;
  303. }
  304. td.nopatches {
  305. background: #d2ffc4;
  306. }
  307. td.somepatches {
  308. background: #ffd870;
  309. }
  310. td.lotsofpatches {
  311. background: #ff9a69;
  312. }
  313. td.good_url {
  314. background: #d2ffc4;
  315. }
  316. td.missing_url {
  317. background: #ffd870;
  318. }
  319. td.invalid_url {
  320. background: #ff9a69;
  321. }
  322. </style>
  323. <title>Statistics of Buildroot packages</title>
  324. </head>
  325. <a href=\"#results\">Results</a><br/>
  326. <p id=\"sortable_hint\"></p>
  327. """
  328. html_footer = """
  329. </body>
  330. <script>
  331. if (typeof sorttable === \"object\") {
  332. document.getElementById(\"sortable_hint\").innerHTML =
  333. \"hint: the table can be sorted by clicking the column headers\"
  334. }
  335. </script>
  336. </html>
  337. """
  338. def infra_str(infra_list):
  339. if not infra_list:
  340. return "Unknown"
  341. elif len(infra_list) == 1:
  342. return "<b>%s</b><br/>%s" % (infra_list[0][1], infra_list[0][0])
  343. elif infra_list[0][1] == infra_list[1][1]:
  344. return "<b>%s</b><br/>%s + %s" % \
  345. (infra_list[0][1], infra_list[0][0], infra_list[1][0])
  346. else:
  347. return "<b>%s</b> (%s)<br/><b>%s</b> (%s)" % \
  348. (infra_list[0][1], infra_list[0][0],
  349. infra_list[1][1], infra_list[1][0])
  350. def boolean_str(b):
  351. if b:
  352. return "Yes"
  353. else:
  354. return "No"
  355. def dump_html_pkg(f, pkg):
  356. f.write(" <tr>\n")
  357. f.write(" <td>%s</td>\n" % pkg.path[2:])
  358. # Patch count
  359. td_class = ["centered"]
  360. if pkg.patch_count == 0:
  361. td_class.append("nopatches")
  362. elif pkg.patch_count < 5:
  363. td_class.append("somepatches")
  364. else:
  365. td_class.append("lotsofpatches")
  366. f.write(" <td class=\"%s\">%s</td>\n" %
  367. (" ".join(td_class), str(pkg.patch_count)))
  368. # Infrastructure
  369. infra = infra_str(pkg.infras)
  370. td_class = ["centered"]
  371. if infra == "Unknown":
  372. td_class.append("wrong")
  373. else:
  374. td_class.append("correct")
  375. f.write(" <td class=\"%s\">%s</td>\n" %
  376. (" ".join(td_class), infra_str(pkg.infras)))
  377. # License
  378. td_class = ["centered"]
  379. if pkg.has_license:
  380. td_class.append("correct")
  381. else:
  382. td_class.append("wrong")
  383. f.write(" <td class=\"%s\">%s</td>\n" %
  384. (" ".join(td_class), boolean_str(pkg.has_license)))
  385. # License files
  386. td_class = ["centered"]
  387. if pkg.has_license_files:
  388. td_class.append("correct")
  389. else:
  390. td_class.append("wrong")
  391. f.write(" <td class=\"%s\">%s</td>\n" %
  392. (" ".join(td_class), boolean_str(pkg.has_license_files)))
  393. # Hash
  394. td_class = ["centered"]
  395. if pkg.has_hash:
  396. td_class.append("correct")
  397. else:
  398. td_class.append("wrong")
  399. f.write(" <td class=\"%s\">%s</td>\n" %
  400. (" ".join(td_class), boolean_str(pkg.has_hash)))
  401. # Current version
  402. if len(pkg.current_version) > 20:
  403. current_version = pkg.current_version[:20] + "..."
  404. else:
  405. current_version = pkg.current_version
  406. f.write(" <td class=\"centered\">%s</td>\n" % current_version)
  407. # Warnings
  408. td_class = ["centered"]
  409. if pkg.warnings == 0:
  410. td_class.append("correct")
  411. else:
  412. td_class.append("wrong")
  413. f.write(" <td class=\"%s\">%d</td>\n" %
  414. (" ".join(td_class), pkg.warnings))
  415. # URL status
  416. td_class = ["centered"]
  417. url_str = pkg.url_status
  418. if pkg.url_status == "Missing" or pkg.url_status == "No Config.in":
  419. td_class.append("missing_url")
  420. elif pkg.url_status.startswith("Invalid"):
  421. td_class.append("invalid_url")
  422. url_str = "<a href=%s>%s</a>" % (pkg.url, pkg.url_status)
  423. else:
  424. td_class.append("good_url")
  425. url_str = "<a href=%s>Link</a>" % pkg.url
  426. f.write(" <td class=\"%s\">%s</td>\n" %
  427. (" ".join(td_class), url_str))
  428. f.write(" </tr>\n")
  429. def dump_html_all_pkgs(f, packages):
  430. f.write("""
  431. <table class=\"sortable\">
  432. <tr>
  433. <td>Package</td>
  434. <td class=\"centered\">Patch count</td>
  435. <td class=\"centered\">Infrastructure</td>
  436. <td class=\"centered\">License</td>
  437. <td class=\"centered\">License files</td>
  438. <td class=\"centered\">Hash file</td>
  439. <td class=\"centered\">Current version</td>
  440. <td class=\"centered\">Warnings</td>
  441. <td class=\"centered\">Upstream URL</td>
  442. </tr>
  443. """)
  444. for pkg in sorted(packages):
  445. dump_html_pkg(f, pkg)
  446. f.write("</table>")
  447. def dump_html_stats(f, stats):
  448. f.write("<a id=\"results\"></a>\n")
  449. f.write("<table>\n")
  450. infras = [infra[6:] for infra in stats.keys() if infra.startswith("infra-")]
  451. for infra in infras:
  452. f.write(" <tr><td>Packages using the <i>%s</i> infrastructure</td><td>%s</td></tr>\n" %
  453. (infra, stats["infra-%s" % infra]))
  454. f.write(" <tr><td>Packages having license information</td><td>%s</td></tr>\n" %
  455. stats["license"])
  456. f.write(" <tr><td>Packages not having license information</td><td>%s</td></tr>\n" %
  457. stats["no-license"])
  458. f.write(" <tr><td>Packages having license files information</td><td>%s</td></tr>\n" %
  459. stats["license-files"])
  460. f.write(" <tr><td>Packages not having license files information</td><td>%s</td></tr>\n" %
  461. stats["no-license-files"])
  462. f.write(" <tr><td>Packages having a hash file</td><td>%s</td></tr>\n" %
  463. stats["hash"])
  464. f.write(" <tr><td>Packages not having a hash file</td><td>%s</td></tr>\n" %
  465. stats["no-hash"])
  466. f.write(" <tr><td>Total number of patches</td><td>%s</td></tr>\n" %
  467. stats["patches"])
  468. f.write("</table>\n")
  469. def dump_gen_info(f):
  470. # Updated on Mon Feb 19 08:12:08 CET 2018, Git commit aa77030b8f5e41f1c53eb1c1ad664b8c814ba032
  471. o = subprocess.check_output(["git", "log", "master", "-n", "1", "--pretty=format:%H"])
  472. git_commit = o.splitlines()[0]
  473. f.write("<p><i>Updated on %s, git commit %s</i></p>\n" %
  474. (str(datetime.datetime.utcnow()), git_commit))
  475. def dump_html(packages, stats, output):
  476. with open(output, 'w') as f:
  477. f.write(html_header)
  478. dump_html_all_pkgs(f, packages)
  479. dump_html_stats(f, stats)
  480. dump_gen_info(f)
  481. f.write(html_footer)
  482. def parse_args():
  483. parser = argparse.ArgumentParser()
  484. parser.add_argument('-o', dest='output', action='store', required=True,
  485. help='HTML output file')
  486. parser.add_argument('-n', dest='npackages', type=int, action='store',
  487. help='Number of packages')
  488. parser.add_argument('-p', dest='packages', action='store',
  489. help='List of packages (comma separated)')
  490. return parser.parse_args()
  491. def __main__():
  492. args = parse_args()
  493. if args.npackages and args.packages:
  494. print("ERROR: -n and -p are mutually exclusive")
  495. sys.exit(1)
  496. if args.packages:
  497. package_list = args.packages.split(",")
  498. else:
  499. package_list = None
  500. print("Build package list ...")
  501. packages = get_pkglist(args.npackages, package_list)
  502. print("Getting package make info ...")
  503. package_init_make_info()
  504. print("Getting package details ...")
  505. for pkg in packages:
  506. pkg.set_infra()
  507. pkg.set_license()
  508. pkg.set_hash_info()
  509. pkg.set_patch_count()
  510. pkg.set_check_package_warnings()
  511. pkg.set_current_version()
  512. pkg.set_url()
  513. print("Checking URL status")
  514. check_package_urls(packages)
  515. print("Calculate stats")
  516. stats = calculate_stats(packages)
  517. print("Write HTML")
  518. dump_html(packages, stats, args.output)
  519. __main__()