dssg · jesteria · Mar 6, 2019 · Mar 5, 2019 · jesteria · Mar 6, 2019
diff --git a/src/tests/catwalk_tests/test_storage.py b/src/tests/catwalk_tests/test_storage.py
@@ -90,12 +90,12 @@ def matrix_stores():
 
     with tempfile.TemporaryDirectory() as tmpdir:
         project_storage = ProjectStorage(tmpdir)
-        tmpcsv = os.path.join(tmpdir, "df.csv")
+        tmpcsv = os.path.join(tmpdir, "df.csv.gz")
         tmpyaml = os.path.join(tmpdir, "df.yaml")
         tmphdf = os.path.join(tmpdir, "df.h5")
         with open(tmpyaml, "w") as outfile:
             yaml.dump(METADATA, outfile, default_flow_style=False)
-        df.to_csv(tmpcsv)
+        df.to_csv(tmpcsv, compression="gzip")
         df.to_hdf(tmphdf, "matrix")
         csv = CSVMatrixStore(project_storage, [], "df")
         hdf = HDFMatrixStore(project_storage, [], "df")

diff --git a/src/tests/test_partial_experiments.py b/src/tests/test_partial_experiments.py
@@ -181,7 +181,7 @@ def test_run(self):
             matrices = experiment.matrix_build_tasks
             assert len(matrices) > 0
             for matrix in matrices:
-                assert "{}.csv".format(matrix) in matrices_and_metadata
+                assert "{}.csv.gz".format(matrix) in matrices_and_metadata
                 assert "{}.yaml".format(matrix) in matrices_and_metadata
 
     def test_validate_nonstrict(self):

diff --git a/src/triage/component/catwalk/storage.py b/src/triage/component/catwalk/storage.py
@@ -19,6 +19,7 @@
 import s3fs
 import yaml
 from boto3.s3.transfer import TransferConfig
+import gzip
 
 
 class Store(object):
@@ -610,15 +611,15 @@ def save(self):
 
 
 class CSVMatrixStore(MatrixStore):
-    """Store and access matrices using CSV"""
+    """Store and access compressed matrices using CSV"""
 
-    suffix = "csv"
+    suffix = "csv.gz"
 
     @property
     def head_of_matrix(self):
         try:
             with self.matrix_base_store.open("rb") as fd:
-                head_of_matrix = pd.read_csv(fd, nrows=1)
+                head_of_matrix = pd.read_csv(fd, compression="gzip", nrows=1)
                 head_of_matrix.set_index(self.metadata["indices"], inplace=True)
         except FileNotFoundError as fnfe:
             logging.exception(f"Matrix isn't there: {fnfe}")
@@ -632,10 +633,10 @@ def _load(self):
             ["as_of_date"] if "as_of_date" in self.metadata["indices"] else False
         )
         with self.matrix_base_store.open("rb") as fd:
-            return pd.read_csv(fd, parse_dates=parse_dates_argument)
+            return pd.read_csv(fd, compression="gzip", parse_dates=parse_dates_argument)
 
     def save(self):
-        self.matrix_base_store.write(self.full_matrix_for_saving.to_csv(None).encode("utf-8"))
+        self.matrix_base_store.write(gzip.compress(self.full_matrix_for_saving.to_csv(None).encode("utf-8")))
         with self.metadata_base_store.open("wb") as fd:
             yaml.dump(self.metadata, fd, encoding="utf-8")