Browse Source

support/scripts/cve.py: switch from NVD to FKIE for the JSON files

NVD will deprecate the v1.1 API which allows us to download the full
database as individual JSON files. Instead, there's a horribly crappy
API that is extremely slow and subject to race conditions.

Fortunately, there is a project, Fraunhofer FKIE - Cyber Analysis and
Defense [1], that goes through the effort of adapting to this new API
and regenerating the convenient JSON files. The JSON files and meta
files are re-generated daily.

Instead of implementing the NVD v2 API, we decided to just use the JSON
files generatd by fkie-cad. That saves us the effort of solving the race
conditions, devising a cache mechanism that works, handling the frequent
gateway timeouts on the NVD servers, dealing with the rate limiting, and
keeping up with changes in the API.

Switch to this repository on github as NVD_BASE_URL. The file name is
also slightly different (CVE-20XX.json instead of nvdcve-1.1-20XX.json).

The fkie-cad repository compresses with xz instead of gz. Therefore:
 - rename the filename variables to _xz instead of _gz;
 - use xz as a subprocess because there is no xz decompressor in Python
   stdlib.

[1] https://www.fkie.fraunhofer.de/en/departments/cad.html

Cc: Daniel Lang <dalang@gmx.at>
Signed-off-by: Arnout Vandecappelle <arnout@mind.be>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Arnout Vandecappelle 1 year ago
parent
commit
22b6945552
1 changed files with 14 additions and 14 deletions
  1. 14 14
      support/scripts/cve.py

+ 14 - 14
support/scripts/cve.py

@@ -22,7 +22,7 @@ import os
 import requests  # URL checking
 import distutils.version
 import time
-import gzip
+import subprocess
 import sys
 import operator
 
@@ -41,8 +41,7 @@ except ImportError:
 sys.path.append('utils/')
 
 NVD_START_YEAR = 2002
-NVD_JSON_VERSION = "1.1"
-NVD_BASE_URL = "https://nvd.nist.gov/feeds/json/cve/" + NVD_JSON_VERSION
+NVD_BASE_URL = "https://github.com/fkie-cad/nvd-json-data-feeds/releases/latest/download"
 
 ops = {
     '>=': operator.ge,
@@ -83,15 +82,15 @@ class CVE:
 
     @staticmethod
     def download_nvd_year(nvd_path, year):
-        metaf = "nvdcve-%s-%s.meta" % (NVD_JSON_VERSION, year)
+        metaf = "CVE-%s.meta" % year
         path_metaf = os.path.join(nvd_path, metaf)
-        jsonf_gz = "nvdcve-%s-%s.json.gz" % (NVD_JSON_VERSION, year)
-        path_jsonf_gz = os.path.join(nvd_path, jsonf_gz)
+        jsonf_xz = "CVE-%s.json.xz" % year
+        path_jsonf_xz = os.path.join(nvd_path, jsonf_xz)
 
         # If the database file is less than a day old, we assume the NVD data
         # locally available is recent enough.
-        if os.path.exists(path_jsonf_gz) and os.stat(path_jsonf_gz).st_mtime >= time.time() - 86400:
-            return path_jsonf_gz
+        if os.path.exists(path_jsonf_xz) and os.stat(path_jsonf_xz).st_mtime >= time.time() - 86400:
+            return path_jsonf_xz
 
         # If not, we download the meta file
         url = "%s/%s" % (NVD_BASE_URL, metaf)
@@ -104,19 +103,19 @@ class CVE:
         # we need to re-download the database.
         # If the database does not exist locally, we need to redownload it in
         # any case.
-        if os.path.exists(path_metaf) and os.path.exists(path_jsonf_gz):
+        if os.path.exists(path_metaf) and os.path.exists(path_jsonf_xz):
             meta_known = open(path_metaf, "r").read()
             if page_meta.text == meta_known:
-                return path_jsonf_gz
+                return path_jsonf_xz
 
         # Grab the compressed JSON NVD, and write files to disk
-        url = "%s/%s" % (NVD_BASE_URL, jsonf_gz)
+        url = "%s/%s" % (NVD_BASE_URL, jsonf_xz)
         print("Getting %s" % url)
         page_json = requests.get(url)
         page_json.raise_for_status()
-        open(path_jsonf_gz, "wb").write(page_json.content)
+        open(path_jsonf_xz, "wb").write(page_json.content)
         open(path_metaf, "w").write(page_meta.text)
-        return path_jsonf_gz
+        return path_jsonf_xz
 
     @classmethod
     def read_nvd_dir(cls, nvd_dir):
@@ -128,7 +127,8 @@ class CVE:
         for year in range(NVD_START_YEAR, datetime.datetime.now().year + 1):
             filename = CVE.download_nvd_year(nvd_dir, year)
             try:
-                content = ijson.items(gzip.GzipFile(filename), 'CVE_Items.item')
+                uncompressed = subprocess.check_output(["xz", "-d", "-c", filename])
+                content = ijson.items(uncompressed, 'CVE_Items.item')
             except:  # noqa: E722
                 print("ERROR: cannot read %s. Please remove the file then rerun this script" % filename)
                 raise