size-stats 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311
  1. #!/usr/bin/env python3
  2. # Copyright (C) 2014 by Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
  3. # This program is free software; you can redistribute it and/or modify
  4. # it under the terms of the GNU General Public License as published by
  5. # the Free Software Foundation; either version 2 of the License, or
  6. # (at your option) any later version.
  7. #
  8. # This program is distributed in the hope that it will be useful,
  9. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
  11. # General Public License for more details.
  12. #
  13. # You should have received a copy of the GNU General Public License
  14. # along with this program; if not, write to the Free Software
  15. # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. import sys
  17. import os
  18. import os.path
  19. import argparse
  20. import csv
  21. import collections
  22. import math
  23. try:
  24. import matplotlib
  25. matplotlib.use('Agg')
  26. import matplotlib.font_manager as fm
  27. import matplotlib.pyplot as plt
  28. except ImportError:
  29. sys.stderr.write("You need python-matplotlib to generate the size graph\n")
  30. exit(1)
  31. class Config:
  32. biggest_first = False
  33. iec = False
  34. size_limit = 0.01
  35. colors = ['#e60004', '#f28e00', '#ffed00', '#940084',
  36. '#2e1d86', '#0068b5', '#009836', '#97c000']
  37. #
  38. # This function adds a new file to 'filesdict', after checking its
  39. # size. The 'filesdict' contain the relative path of the file as the
  40. # key, and as the value a tuple containing the name of the package to
  41. # which the file belongs and the size of the file.
  42. #
  43. # filesdict: the dict to which the file is added
  44. # relpath: relative path of the file
  45. # fullpath: absolute path to the file
  46. # pkg: package to which the file belongs
  47. #
  48. def add_file(filesdict, relpath, abspath, pkg):
  49. if relpath.endswith(".py"):
  50. # also check for compiled .pyc file
  51. add_file(filesdict, relpath + "c", abspath + "c", pkg)
  52. if not os.path.exists(abspath):
  53. return
  54. if os.path.islink(abspath):
  55. return
  56. sz = os.stat(abspath).st_size
  57. filesdict[relpath] = (pkg, sz)
  58. #
  59. # This function returns a dict where each key is the path of a file in
  60. # the root filesystem, and the value is a tuple containing two
  61. # elements: the name of the package to which this file belongs and the
  62. # size of the file.
  63. #
  64. # builddir: path to the Buildroot output directory
  65. #
  66. def build_package_dict(builddir):
  67. filesdict = {}
  68. with open(os.path.join(builddir, "build", "packages-file-list.txt")) as f:
  69. for line in f.readlines():
  70. pkg, fpath = line.split(",", 1)
  71. # remove the initial './' in each file path
  72. fpath = fpath.strip()[2:]
  73. fullpath = os.path.join(builddir, "target", fpath)
  74. add_file(filesdict, fpath, fullpath, pkg)
  75. return filesdict
  76. #
  77. # This function builds a dictionary that contains the name of a
  78. # package as key, and the size of the files installed by this package
  79. # as the value.
  80. #
  81. # filesdict: dictionary with the name of the files as key, and as
  82. # value a tuple containing the name of the package to which the files
  83. # belongs, and the size of the file. As returned by
  84. # build_package_dict.
  85. #
  86. # builddir: path to the Buildroot output directory
  87. #
  88. def build_package_size(filesdict, builddir):
  89. pkgsize = collections.defaultdict(int)
  90. seeninodes = set()
  91. for root, _, files in os.walk(os.path.join(builddir, "target")):
  92. for f in files:
  93. fpath = os.path.join(root, f)
  94. if os.path.islink(fpath):
  95. continue
  96. st = os.stat(fpath)
  97. if st.st_ino in seeninodes:
  98. # hard link
  99. continue
  100. else:
  101. seeninodes.add(st.st_ino)
  102. frelpath = os.path.relpath(fpath, os.path.join(builddir, "target"))
  103. if frelpath not in filesdict:
  104. print("WARNING: %s is not part of any package" % frelpath)
  105. pkg = "unknown"
  106. else:
  107. pkg = filesdict[frelpath][0]
  108. pkgsize[pkg] += st.st_size
  109. return pkgsize
  110. #
  111. # Given a dict returned by build_package_size(), this function
  112. # generates a pie chart of the size installed by each package.
  113. #
  114. # pkgsize: dictionary with the name of the package as a key, and the
  115. # size as the value, as returned by build_package_size.
  116. #
  117. # outputf: output file for the graph
  118. #
  119. def draw_graph(pkgsize, outputf):
  120. def size2string(sz):
  121. if Config.iec:
  122. divider = 1024.0
  123. prefixes = ['', 'Ki', 'Mi', 'Gi', 'Ti']
  124. else:
  125. divider = 1000.0
  126. prefixes = ['', 'k', 'M', 'G', 'T']
  127. while sz > divider and len(prefixes) > 1:
  128. prefixes = prefixes[1:]
  129. sz = sz/divider
  130. # precision is made so that there are always at least three meaningful
  131. # digits displayed (e.g. '3.14' and '10.4', not just '3' and '10')
  132. precision = int(2-math.floor(math.log10(sz))) if sz < 1000 else 0
  133. return '{:.{prec}f} {}B'.format(sz, prefixes[0], prec=precision)
  134. total = sum(pkgsize.values())
  135. labels = []
  136. values = []
  137. other_value = 0
  138. unknown_value = 0
  139. for (p, sz) in sorted(pkgsize.items(), key=lambda x: x[1],
  140. reverse=Config.biggest_first):
  141. if sz < (total * Config.size_limit):
  142. other_value += sz
  143. elif p == "unknown":
  144. unknown_value = sz
  145. else:
  146. labels.append("%s (%s)" % (p, size2string(sz)))
  147. values.append(sz)
  148. if unknown_value != 0:
  149. labels.append("Unknown (%s)" % (size2string(unknown_value)))
  150. values.append(unknown_value)
  151. if other_value != 0:
  152. labels.append("Other (%s)" % (size2string(other_value)))
  153. values.append(other_value)
  154. plt.figure()
  155. patches, texts, autotexts = plt.pie(values, labels=labels,
  156. autopct='%1.1f%%', shadow=True,
  157. colors=Config.colors)
  158. # Reduce text size
  159. proptease = fm.FontProperties()
  160. proptease.set_size('xx-small')
  161. plt.setp(autotexts, fontproperties=proptease)
  162. plt.setp(texts, fontproperties=proptease)
  163. plt.suptitle("Filesystem size per package", fontsize=18, y=.97)
  164. plt.title("Total filesystem size: %s" % (size2string(total)), fontsize=10,
  165. y=.96)
  166. plt.savefig(outputf)
  167. #
  168. # Generate a CSV file with statistics about the size of each file, its
  169. # size contribution to the package and to the overall system.
  170. #
  171. # filesdict: dictionary with the name of the files as key, and as
  172. # value a tuple containing the name of the package to which the files
  173. # belongs, and the size of the file. As returned by
  174. # build_package_dict.
  175. #
  176. # pkgsize: dictionary with the name of the package as a key, and the
  177. # size as the value, as returned by build_package_size.
  178. #
  179. # outputf: output CSV file
  180. #
  181. def gen_files_csv(filesdict, pkgsizes, outputf):
  182. total = 0
  183. for (p, sz) in pkgsizes.items():
  184. total += sz
  185. with open(outputf, 'w') as csvfile:
  186. wr = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
  187. wr.writerow(["File name",
  188. "Package name",
  189. "File size",
  190. "Package size",
  191. "File size in package (%)",
  192. "File size in system (%)"])
  193. for f, (pkgname, filesize) in filesdict.items():
  194. pkgsize = pkgsizes[pkgname]
  195. if pkgsize == 0:
  196. percent_pkg = 0
  197. else:
  198. percent_pkg = float(filesize) / pkgsize * 100
  199. percent_total = float(filesize) / total * 100
  200. wr.writerow([f, pkgname, filesize, pkgsize,
  201. "%.1f" % percent_pkg,
  202. "%.1f" % percent_total])
  203. #
  204. # Generate a CSV file with statistics about the size of each package,
  205. # and their size contribution to the overall system.
  206. #
  207. # pkgsize: dictionary with the name of the package as a key, and the
  208. # size as the value, as returned by build_package_size.
  209. #
  210. # outputf: output CSV file
  211. #
  212. def gen_packages_csv(pkgsizes, outputf):
  213. total = sum(pkgsizes.values())
  214. with open(outputf, 'w') as csvfile:
  215. wr = csv.writer(csvfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
  216. wr.writerow(["Package name", "Package size",
  217. "Package size in system (%)"])
  218. for (pkg, size) in pkgsizes.items():
  219. wr.writerow([pkg, size, "%.1f" % (float(size) / total * 100)])
  220. #
  221. # Our special action for --iec, --binary, --si, --decimal
  222. #
  223. class PrefixAction(argparse.Action):
  224. def __init__(self, option_strings, dest, **kwargs):
  225. for key in ["type", "nargs"]:
  226. if key in kwargs:
  227. raise ValueError('"{}" not allowed'.format(key))
  228. super(PrefixAction, self).__init__(option_strings, dest, nargs=0,
  229. type=bool, **kwargs)
  230. def __call__(self, parser, namespace, values, option_string=None):
  231. setattr(namespace, self.dest, option_string in ["--iec", "--binary"])
  232. def main():
  233. parser = argparse.ArgumentParser(description='Draw size statistics graphs')
  234. parser.add_argument("--builddir", '-i', metavar="BUILDDIR", required=True,
  235. help="Buildroot output directory")
  236. parser.add_argument("--graph", '-g', metavar="GRAPH",
  237. help="Graph output file (.pdf or .png extension)")
  238. parser.add_argument("--file-size-csv", '-f', metavar="FILE_SIZE_CSV",
  239. help="CSV output file with file size statistics")
  240. parser.add_argument("--package-size-csv", '-p', metavar="PKG_SIZE_CSV",
  241. help="CSV output file with package size statistics")
  242. parser.add_argument("--biggest-first", action='store_true',
  243. help="Sort packages in decreasing size order, " +
  244. "rather than in increasing size order")
  245. parser.add_argument("--iec", "--binary", "--si", "--decimal",
  246. action=PrefixAction,
  247. help="Use IEC (binary, powers of 1024) or SI (decimal, "
  248. "powers of 1000, the default) prefixes")
  249. parser.add_argument("--size-limit", "-l", type=float,
  250. help='Under this size ratio, files are accounted to ' +
  251. 'the generic "Other" package. Default: 0.01 (1%%)')
  252. args = parser.parse_args()
  253. Config.biggest_first = args.biggest_first
  254. Config.iec = args.iec
  255. if args.size_limit is not None:
  256. if args.size_limit < 0.0 or args.size_limit > 1.0:
  257. parser.error("--size-limit must be in [0.0..1.0]")
  258. Config.size_limit = args.size_limit
  259. # Find out which package installed what files
  260. pkgdict = build_package_dict(args.builddir)
  261. # Collect the size installed by each package
  262. pkgsize = build_package_size(pkgdict, args.builddir)
  263. if args.graph:
  264. draw_graph(pkgsize, args.graph)
  265. if args.file_size_csv:
  266. gen_files_csv(pkgdict, pkgsize, args.file_size_csv)
  267. if args.package_size_csv:
  268. gen_packages_csv(pkgsize, args.package_size_csv)
  269. if __name__ == "__main__":
  270. main()