Pārlūkot izejas kodu

support/scripts/cve.py: switch from NVD to FKIE for the JSON files

NVD will deprecate the v1.1 API which allows us to download the full
database as individual JSON files. Instead, there's a horribly crappy
API that is extremely slow and subject to race conditions.

Fortunately, there is a project, Fraunhofer FKIE - Cyber Analysis and
Defense [1], that goes through the effort of adapting to this new API
and regenerating the convenient JSON files. The JSON files and meta
files are re-generated daily.

Instead of implementing the NVD v2 API, we decided to just use the JSON
files generatd by fkie-cad. That saves us the effort of solving the race
conditions, devising a cache mechanism that works, handling the frequent
gateway timeouts on the NVD servers, dealing with the rate limiting, and
keeping up with changes in the API.

Switch to this repository on github as NVD_BASE_URL. The file name is
also slightly different (CVE-20XX.json instead of nvdcve-1.1-20XX.json).

The fkie-cad repository compresses with xz instead of gz. Therefore:
 - rename the filename variables to _xz instead of _gz;
 - use xz as a subprocess because there is no xz decompressor in Python
   stdlib.

[1] https://www.fkie.fraunhofer.de/en/departments/cad.html

Cc: Daniel Lang <dalang@gmx.at>
Signed-off-by: Arnout Vandecappelle <arnout@mind.be>
Signed-off-by: Thomas Petazzoni <thomas.petazzoni@bootlin.com>
Arnout Vandecappelle 1 gadu atpakaļ
vecāks
revīzija
22b6945552
1 mainītis faili ar 14 papildinājumiem un 14 dzēšanām
  1. 14 14
      support/scripts/cve.py

+ 14 - 14
support/scripts/cve.py

@@ -22,7 +22,7 @@ import os
 import requests  # URL checking
 import requests  # URL checking
 import distutils.version
 import distutils.version
 import time
 import time
-import gzip
+import subprocess
 import sys
 import sys
 import operator
 import operator
 
 
@@ -41,8 +41,7 @@ except ImportError:
 sys.path.append('utils/')
 sys.path.append('utils/')
 
 
 NVD_START_YEAR = 2002
 NVD_START_YEAR = 2002
-NVD_JSON_VERSION = "1.1"
-NVD_BASE_URL = "https://nvd.nist.gov/feeds/json/cve/" + NVD_JSON_VERSION
+NVD_BASE_URL = "https://github.com/fkie-cad/nvd-json-data-feeds/releases/latest/download"
 
 
 ops = {
 ops = {
     '>=': operator.ge,
     '>=': operator.ge,
@@ -83,15 +82,15 @@ class CVE:
 
 
     @staticmethod
     @staticmethod
     def download_nvd_year(nvd_path, year):
     def download_nvd_year(nvd_path, year):
-        metaf = "nvdcve-%s-%s.meta" % (NVD_JSON_VERSION, year)
+        metaf = "CVE-%s.meta" % year
         path_metaf = os.path.join(nvd_path, metaf)
         path_metaf = os.path.join(nvd_path, metaf)
-        jsonf_gz = "nvdcve-%s-%s.json.gz" % (NVD_JSON_VERSION, year)
-        path_jsonf_gz = os.path.join(nvd_path, jsonf_gz)
+        jsonf_xz = "CVE-%s.json.xz" % year
+        path_jsonf_xz = os.path.join(nvd_path, jsonf_xz)
 
 
         # If the database file is less than a day old, we assume the NVD data
         # If the database file is less than a day old, we assume the NVD data
         # locally available is recent enough.
         # locally available is recent enough.
-        if os.path.exists(path_jsonf_gz) and os.stat(path_jsonf_gz).st_mtime >= time.time() - 86400:
-            return path_jsonf_gz
+        if os.path.exists(path_jsonf_xz) and os.stat(path_jsonf_xz).st_mtime >= time.time() - 86400:
+            return path_jsonf_xz
 
 
         # If not, we download the meta file
         # If not, we download the meta file
         url = "%s/%s" % (NVD_BASE_URL, metaf)
         url = "%s/%s" % (NVD_BASE_URL, metaf)
@@ -104,19 +103,19 @@ class CVE:
         # we need to re-download the database.
         # we need to re-download the database.
         # If the database does not exist locally, we need to redownload it in
         # If the database does not exist locally, we need to redownload it in
         # any case.
         # any case.
-        if os.path.exists(path_metaf) and os.path.exists(path_jsonf_gz):
+        if os.path.exists(path_metaf) and os.path.exists(path_jsonf_xz):
             meta_known = open(path_metaf, "r").read()
             meta_known = open(path_metaf, "r").read()
             if page_meta.text == meta_known:
             if page_meta.text == meta_known:
-                return path_jsonf_gz
+                return path_jsonf_xz
 
 
         # Grab the compressed JSON NVD, and write files to disk
         # Grab the compressed JSON NVD, and write files to disk
-        url = "%s/%s" % (NVD_BASE_URL, jsonf_gz)
+        url = "%s/%s" % (NVD_BASE_URL, jsonf_xz)
         print("Getting %s" % url)
         print("Getting %s" % url)
         page_json = requests.get(url)
         page_json = requests.get(url)
         page_json.raise_for_status()
         page_json.raise_for_status()
-        open(path_jsonf_gz, "wb").write(page_json.content)
+        open(path_jsonf_xz, "wb").write(page_json.content)
         open(path_metaf, "w").write(page_meta.text)
         open(path_metaf, "w").write(page_meta.text)
-        return path_jsonf_gz
+        return path_jsonf_xz
 
 
     @classmethod
     @classmethod
     def read_nvd_dir(cls, nvd_dir):
     def read_nvd_dir(cls, nvd_dir):
@@ -128,7 +127,8 @@ class CVE:
         for year in range(NVD_START_YEAR, datetime.datetime.now().year + 1):
         for year in range(NVD_START_YEAR, datetime.datetime.now().year + 1):
             filename = CVE.download_nvd_year(nvd_dir, year)
             filename = CVE.download_nvd_year(nvd_dir, year)
             try:
             try:
-                content = ijson.items(gzip.GzipFile(filename), 'CVE_Items.item')
+                uncompressed = subprocess.check_output(["xz", "-d", "-c", filename])
+                content = ijson.items(uncompressed, 'CVE_Items.item')
             except:  # noqa: E722
             except:  # noqa: E722
                 print("ERROR: cannot read %s. Please remove the file then rerun this script" % filename)
                 print("ERROR: cannot read %s. Please remove the file then rerun this script" % filename)
                 raise
                 raise