diff --git a/Cargo.lock b/Cargo.lock index 94e2e635..ed14c3fc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10,9 +10,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" [[package]] name = "ahash" -version = "0.7.7" +version = "0.7.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a824f2aa7e75a0c98c5a504fceb80649e9c35265d44525b5f94de4771a395cd" +checksum = "891477e0c6a8957309ee5c45a6368af3ae14bb510732d2684ffa19af310920f9" dependencies = [ "getrandom", "once_cell", @@ -51,9 +51,9 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.4" +version = "1.0.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7079075b41f533b8c61d2a4d073c4676e1f8b249ff94a393b0595db304e0dd87" +checksum = "8901269c6307e8d93993578286ac0edf7f195079ffff5ebdeea6a59ffb7e36bc" [[package]] name = "anyhow" @@ -61,6 +61,15 @@ version = "1.0.80" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5ad32ce52e4161730f7098c077cd2ed6229b5804ccf99e5366be1ab72a98b4e1" +[[package]] +name = "approx" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" +dependencies = [ + "num-traits", +] + [[package]] name = "assert_cmd" version = "2.0.14" @@ -128,7 +137,7 @@ dependencies = [ "regex", "rustc-hash", "shlex", - "syn 2.0.48", + "syn 2.0.50", ] [[package]] @@ -178,15 +187,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.14.0" +version = "3.15.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f30e7476521f6f8af1a1c4c0b8cc94f0bee37d91763d0ca2665f299b6cd8aec" +checksum = "a3b1be7772ee4501dba05acbe66bb1e8760f6a6c474a36035631638e4415f130" [[package]] name = "bytecheck" -version = "0.6.11" +version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b6372023ac861f6e6dc89c8344a8f398fb42aaba2b5dbc649ca0c0e9dbcb627" +checksum = "23cdc57ce23ac53c931e88a43d06d070a6fd142f2617be5855eb75efc9beb1c2" dependencies = [ "bytecheck_derive", "ptr_meta", @@ -195,9 +204,9 @@ dependencies = [ [[package]] name = "bytecheck_derive" -version = "0.6.11" +version = "0.6.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7ec4c6f261935ad534c0c22dbef2201b45918860eb1c574b972bd213a76af61" +checksum = "3db406d29fbcd95542e92559bed4d8ad92636d1ca8b3b72ede10b4bcc010e659" dependencies = [ "proc-macro2", "quote", @@ -212,9 +221,9 @@ checksum = "e1e5f035d16fc623ae5f74981db80a439803888314e3a555fd6f04acd51a3205" [[package]] name = "bytemuck" -version = "1.14.0" +version = "1.14.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "374d28ec25809ee0e23827c2ab573d729e293f281dfe393500e7ad618baa61c6" +checksum = "a2ef034f05691a48569bd920a96c81b9d91bbad1ab5ac7c4616c1f6ef36cb79f" [[package]] name = "byteorder" @@ -260,11 +269,10 @@ dependencies = [ [[package]] name = "cc" -version = "1.0.83" +version = "1.0.86" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1174fb0b6ec23863f8b971027804a42614e347eafb0a95bf0b12cdae21fc4d0" +checksum = "7f9fa1897e4325be0d68d48df6aa1a71ac2ed4d27723887e7754192705350730" dependencies = [ - "jobserver", "libc", ] @@ -285,9 +293,9 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" [[package]] name = "chrono" -version = "0.4.33" +version = "0.4.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f13690e35a5e4ace198e7beea2895d29f3a9cc55015fcebe6336bd2010af9eb" +checksum = "5bc015644b92d5890fab7489e49d21f879d5c990186827d42ec511919404f38b" dependencies = [ "android-tzdata", "iana-time-zone", @@ -334,9 +342,9 @@ dependencies = [ [[package]] name = "crc32fast" -version = "1.3.2" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d" +checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa" dependencies = [ "cfg-if", ] @@ -401,9 +409,9 @@ checksum = "fea41bba32d969b513997752735605054bc0dfa92b4c56bf1189f2e174be7a10" [[package]] name = "either" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07" +checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a" [[package]] name = "enum_dispatch" @@ -414,7 +422,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.50", ] [[package]] @@ -531,9 +539,9 @@ checksum = "bd5256b483761cd23699d0da46cc6fd2ee3be420bbe6d020ae4a091e70b7e9fd" [[package]] name = "histogram" -version = "0.9.0" +version = "0.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e5ee9487899388cf1a1155759c39e3c156c5d198b6da1734053954a6e40e6d4d" +checksum = "4b634390eb8a63662e127836d4e2f26d7ae930600d4e05ee0fd85a009eeb1175" dependencies = [ "thiserror", ] @@ -546,9 +554,9 @@ checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" [[package]] name = "iana-time-zone" -version = "0.1.59" +version = "0.1.60" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b6a67363e2aa4443928ce15e57ebae94fd8949958fd1223c4cfc0cd473ad7539" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -595,9 +603,9 @@ dependencies = [ [[package]] name = "itertools" -version = "0.12.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0" +checksum = "ba291022dbbd398a455acf126c1e341954079855bc60dfdda641363bd6922569" dependencies = [ "either", ] @@ -608,20 +616,11 @@ version = "1.0.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1a46d1a171d865aa5f83f92695765caa047a9b4cbae2cbf37dbd613a793fd4c" -[[package]] -name = "jobserver" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37f63953c4c63420ed5fd3d6d398c719489b9f872b9fa683262f8edd363c7d" -dependencies = [ - "libc", -] - [[package]] name = "js-sys" -version = "0.3.67" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a1d36f1235bc969acba30b7f5990b864423a6068a10f7c90ae8f0112e3a59d1" +checksum = "406cda4b368d531c842222cf9d2600a9a4acce8d29423695379c6868a143a9ee" dependencies = [ "wasm-bindgen", ] @@ -640,9 +639,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.152" +version = "0.2.153" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13e3bf6590cbc649f4d1a3eefc9d5d6eb746f5200ffb04e5e142700b8faa56e7" +checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd" [[package]] name = "libloading" @@ -654,6 +653,12 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "libm" +version = "0.2.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ec2a862134d2a7d32d7983ddcdd1c4923530833c9f2ea1a44fc5fa473989058" + [[package]] name = "librocksdb-sys" version = "0.11.0+8.1.1" @@ -672,9 +677,9 @@ dependencies = [ [[package]] name = "libz-sys" -version = "1.1.14" +version = "1.1.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "295c17e837573c8c821dbaeb3cceb3d745ad082f7572191409e69cbc1b3fd050" +checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6" dependencies = [ "cc", "pkg-config", @@ -724,6 +729,16 @@ dependencies = [ "pkg-config", ] +[[package]] +name = "matrixmultiply" +version = "0.3.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7574c1cf36da4798ab73da5b215bbf444f50718207754cb522201d78d1cd0ff2" +dependencies = [ + "autocfg", + "rawpointer", +] + [[package]] name = "md5" version = "0.7.0" @@ -762,9 +777,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.7.1" +version = "0.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7" +checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7" dependencies = [ "adler", ] @@ -775,6 +790,35 @@ version = "0.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2983372caf4480544083767bf2d27defafe32af49ab4df3a0b7fc90793a3664" +[[package]] +name = "nalgebra" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d506eb7e08d6329505faa8a3a00a5dcc6de9f76e0c77e4b75763ae3c770831ff" +dependencies = [ + "approx", + "matrixmultiply", + "nalgebra-macros", + "num-complex", + "num-rational", + "num-traits", + "rand", + "rand_distr", + "simba", + "typenum", +] + +[[package]] +name = "nalgebra-macros" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01fcc0b8149b4632adc89ac3b7b31a12fb6099a0317a4eb2ebff574ef7de7218" +dependencies = [ + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "needletail" version = "0.5.1" @@ -826,21 +870,40 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "61807f77802ff30975e01f4f071c8ba10c022052f98b3294119f3e615d13e5be" +[[package]] +name = "num-complex" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "23c6602fda94a57c990fe0df199a035d83576b496aa29f4e634a8ac6004e68a6" +dependencies = [ + "num-traits", +] + [[package]] name = "num-integer" -version = "0.1.45" +version = "0.1.46" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225d3389fb3509a24c93f5c29eb6bde2586b98d9f016636dff58d7c6f7569cd9" +checksum = "7969661fd2958a5cb096e56c8e1ad0444ac2bbcd0061bd28660485a44879858f" dependencies = [ - "autocfg", "num-traits", ] [[package]] name = "num-iter" -version = "0.1.43" +version = "0.1.44" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d869c01cc0c455284163fd0092f1f93835385ccab5a98a0dcc497b2f8bf055a9" +dependencies = [ + "autocfg", + "num-integer", + "num-traits", +] + +[[package]] +name = "num-rational" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d03e6c028c5dc5cac6e2dec0efda81fc887605bb3d884578bb6d6bf7514e252" +checksum = "0638a1c9d0a3c0914158145bc76cff373a75a627e6ecbfb71cbe6f453a5a19b0" dependencies = [ "autocfg", "num-integer", @@ -849,11 +912,12 @@ dependencies = [ [[package]] name = "num-traits" -version = "0.2.17" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39e3200413f237f41ab11ad6d161bc7239c84dcb631773ccd7de3dfe4b5c267c" +checksum = "da0df0e5185db44f69b44f26786fe401b6c293d1907744beaa7fa62b2e5a517a" dependencies = [ "autocfg", + "libm", ] [[package]] @@ -884,7 +948,7 @@ dependencies = [ "proc-macro2", "proc-macro2-diagnostics", "quote", - "syn 2.0.48", + "syn 2.0.50", ] [[package]] @@ -910,6 +974,12 @@ dependencies = [ "windows-targets 0.48.5", ] +[[package]] +name = "paste" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "de3145af08024dea9fa9914f381a17b8fc6034dfb00f3a84013f7ff43f29ed4c" + [[package]] name = "peeking_take_while" version = "0.1.2" @@ -934,9 +1004,9 @@ dependencies = [ [[package]] name = "pkg-config" -version = "0.3.29" +version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2900ede94e305130c13ddd391e0ab7cbaeb783945ae07a279c268cb05109c6cb" +checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" [[package]] name = "ppv-lite86" @@ -981,7 +1051,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a41cf62165e97c7f814d2221421dbb9afcbcdb0a88068e5ea206e19951c2cbb5" dependencies = [ "proc-macro2", - "syn 2.0.48", + "syn 2.0.50", ] [[package]] @@ -1034,7 +1104,7 @@ checksum = "af066a9c399a26e020ada66a034357a868728e72cd426f3adcd35f80d88d88c8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.50", "version_check", "yansi", ] @@ -1106,7 +1176,7 @@ dependencies = [ "proc-macro2", "pyo3-macros-backend", "quote", - "syn 2.0.48", + "syn 2.0.50", ] [[package]] @@ -1118,7 +1188,7 @@ dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.50", ] [[package]] @@ -1166,6 +1236,22 @@ dependencies = [ "getrandom", ] +[[package]] +name = "rand_distr" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" +dependencies = [ + "num-traits", + "rand", +] + +[[package]] +name = "rawpointer" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" + [[package]] name = "rayon" version = "1.8.1" @@ -1209,9 +1295,9 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.4" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b7fa1134405e2ec9353fd416b17f8dacd46c473d7d3fd1cf202706a14eb792a" +checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", @@ -1226,19 +1312,13 @@ checksum = "c08c74e62047bb2de4ff487b251e4a92e24f48745648451635cec7d591162d9f" [[package]] name = "rend" -version = "0.4.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2571463863a6bd50c32f94402933f03457a3fbaf697a707c5be741e459f08fd" +checksum = "71fe3824f5629716b1589be05dacd749f6aa084c87e00e016714a8cdfccc997c" dependencies = [ "bytecheck", ] -[[package]] -name = "retain_mut" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c31b5c4033f8fdde8700e4657be2c497e7288f01515be52168c631e2e4d4086" - [[package]] name = "rkyv" version = "0.7.44" @@ -1270,13 +1350,12 @@ dependencies = [ [[package]] name = "roaring" -version = "0.10.2" +version = "0.10.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6106b5cf8587f5834158895e9715a3c6c9716c8aefab57f1f7680917191c7873" +checksum = "a1c77081a55300e016cb86f2864415b7518741879db925b8d488a0ee0d2da6bf" dependencies = [ "bytemuck", "byteorder", - "retain_mut", ] [[package]] @@ -1289,6 +1368,12 @@ dependencies = [ "librocksdb-sys", ] +[[package]] +name = "roots" +version = "0.0.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "082f11ffa03bbef6c2c6ea6bea1acafaade2fd9050ae0234ab44a2153742b058" + [[package]] name = "rustc-hash" version = "1.1.0" @@ -1310,9 +1395,18 @@ dependencies = [ [[package]] name = "ryu" -version = "1.0.16" +version = "1.0.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" +checksum = "e86697c916019a8588c99b5fac3cead74ec0b4b819707a682fd4d23fa0ce1ba1" + +[[package]] +name = "safe_arch" +version = "0.7.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f398075ce1e6a179b46f51bd88d0598b92b00d3551f1a2d4ac49e771b56ac354" +dependencies = [ + "bytemuck", +] [[package]] name = "safemem" @@ -1334,29 +1428,29 @@ checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" [[package]] name = "serde" -version = "1.0.196" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "870026e60fa08c69f064aa766c10f10b1d62db9ccd4d0abb206472bee0ce3b32" +checksum = "3fb1c873e1b9b056a4dc4c0c198b24c3ffa059243875552b2bd0933b1aee4ce2" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.196" +version = "1.0.197" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33c85360c95e7d137454dc81d9a4ed2b8efd8fbe19cee57357b32b9771fccb67" +checksum = "7eb0b34b42edc17f6b7cac84a52a1c5f0e1bb2227e997ca9011ea3dd34e8610b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.50", ] [[package]] name = "serde_json" -version = "1.0.113" +version = "1.0.114" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69801b70b1c3dac963ecb03a364ba0ceda9cf60c71cfe475e99864759c8b8a79" +checksum = "c5f09b1bd632ef549eaa9f60a1f8de742bdbc698e6cee2095fc84dde5f549ae0" dependencies = [ "itoa", "ryu", @@ -1369,6 +1463,19 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "simba" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0b7840f121a46d63066ee7a99fc81dcabbc6105e437cae43528cea199b5a05f" +dependencies = [ + "approx", + "num-complex", + "num-traits", + "paste", + "wide", +] + [[package]] name = "simdutf8" version = "0.1.4" @@ -1395,9 +1502,9 @@ checksum = "bceb57dc07c92cdae60f5b27b3fa92ecaaa42fe36c55e22dbfb0b44893e0b1f7" [[package]] name = "sourmash" -version = "0.12.1" +version = "0.13.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aa8187a00787432261dc522b6ebf813251dbbeabc04ed7a47f5cbb9be0d4a508" +checksum = "ae9e413cb7387bbb4405e960920e5d8c5f255ec4a86f021a18a455014565e749" dependencies = [ "az", "byteorder", @@ -1411,6 +1518,7 @@ dependencies = [ "getrandom", "getset", "histogram", + "itertools", "log", "md5", "memmap2", @@ -1426,8 +1534,11 @@ dependencies = [ "rkyv", "roaring", "rocksdb", + "roots", "serde", "serde_json", + "statrs", + "streaming-stats", "thiserror", "twox-hash", "typed-builder", @@ -1466,6 +1577,28 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" +[[package]] +name = "statrs" +version = "0.16.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d08e5e1748192713cc281da8b16924fb46be7b0c2431854eadc785823e5696e" +dependencies = [ + "approx", + "lazy_static", + "nalgebra", + "num-traits", + "rand", +] + +[[package]] +name = "streaming-stats" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b0d670ce4e348a2081843569e0f79b21c99c91bb9028b3b3ecb0f050306de547" +dependencies = [ + "num-traits", +] + [[package]] name = "syn" version = "1.0.109" @@ -1479,9 +1612,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.48" +version = "2.0.50" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f3531638e407dfc0814761abb7c00a5b54992b849452a0646b7f65c9f770f3f" +checksum = "74f1bdc9872430ce9b75da68329d1c1746faf50ffac5f19e02b71e37ff881ffb" dependencies = [ "proc-macro2", "quote", @@ -1529,22 +1662,22 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "thiserror" -version = "1.0.56" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d54378c645627613241d077a3a79db965db602882668f9136ac42af9ecb730ad" +checksum = "1e45bcbe8ed29775f228095caf2cd67af7a4ccf756ebff23a306bf3e8b47b24b" dependencies = [ "thiserror-impl", ] [[package]] name = "thiserror-impl" -version = "1.0.56" +version = "1.0.57" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fa0faa943b50f3db30a20aa7e265dbc66076993efed8463e8de414e5d06d3471" +checksum = "a953cb265bef375dae3de6663da4d3804eee9682ea80d8e2542529b73c531c81" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.50", ] [[package]] @@ -1590,9 +1723,15 @@ checksum = "563b3b88238ec95680aef36bdece66896eaa7ce3c0f1b4f39d38fb2435261352" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.50", ] +[[package]] +name = "typenum" +version = "1.17.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" + [[package]] name = "unicode-ident" version = "1.0.12" @@ -1655,9 +1794,9 @@ checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" [[package]] name = "wasm-bindgen" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1223296a201415c7fad14792dbefaace9bd52b62d33453ade1c5b5f07555406" +checksum = "c1e124130aee3fb58c5bdd6b639a0509486b0338acaaae0c84a5124b0f588b7f" dependencies = [ "cfg-if", "serde", @@ -1667,24 +1806,24 @@ dependencies = [ [[package]] name = "wasm-bindgen-backend" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fcdc935b63408d58a32f8cc9738a0bffd8f05cc7c002086c6ef20b7312ad9dcd" +checksum = "c9e7e1900c352b609c8488ad12639a311045f40a35491fb69ba8c12f758af70b" dependencies = [ "bumpalo", "log", "once_cell", "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.50", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e4c238561b2d428924c49815533a8b9121c664599558a5d9ec51f8a1740a999" +checksum = "b30af9e2d358182b5c7449424f017eba305ed32a7010509ede96cdc4696c46ed" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -1692,33 +1831,43 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bae1abb6806dc1ad9e560ed242107c0f6c84335f1749dd4e8ddb012ebd5e25a7" +checksum = "642f325be6301eb8107a83d12a8ac6c1e1c54345a7ef1a9261962dfefda09e66" dependencies = [ "proc-macro2", "quote", - "syn 2.0.48", + "syn 2.0.50", "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.90" +version = "0.2.91" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4d91413b1c31d7539ba5ef2451af3f0b833a005eb27a631cec32bc0635a8602b" +checksum = "4f186bd2dcf04330886ce82d6f33dd75a7bfcf69ecf5763b89fcde53b6ac9838" [[package]] name = "web-sys" -version = "0.3.67" +version = "0.3.68" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "58cd2333b6e0be7a39605f0e255892fd7418a682d8da8fe042fe25128794d2ed" +checksum = "96565907687f7aceb35bc5fc03770a8a0471d82e479f25832f54a0e3f4b28446" dependencies = [ "js-sys", "wasm-bindgen", ] +[[package]] +name = "wide" +version = "0.7.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "89beec544f246e679fc25490e3f8e08003bc4bf612068f325120dad4cea02c1c" +dependencies = [ + "bytemuck", + "safe_arch", +] + [[package]] name = "winapi" version = "0.3.9" diff --git a/Cargo.toml b/Cargo.toml index 0b2cc497..5a607980 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ crate-type = ["cdylib"] pyo3 = { version = "0.20.2", features = ["extension-module", "anyhow"] } rayon = "1.8.1" serde = { version = "1.0.196", features = ["derive"] } -sourmash = { version = "0.12.1", features = ["branchwater"] } +sourmash = { version = "0.13.0", features = ["branchwater"] } serde_json = "1.0.113" niffler = "2.4.0" log = "0.4.14" diff --git a/src/check.rs b/src/check.rs index 2995284b..5ffc0ff5 100644 --- a/src/check.rs +++ b/src/check.rs @@ -8,7 +8,7 @@ pub fn check(index: camino::Utf8PathBuf, quick: bool) -> Result<(), Box Result, String> { Ok(unique_params.into_iter().collect()) } -fn build_siginfo( - params: &[Params], - moltype: &str, - name: &str, - filename: &Path, -) -> (Vec, Vec) { +fn build_siginfo(params: &[Params], moltype: &str) -> Vec { let mut sigs = Vec::new(); - let mut params_vec = Vec::new(); for param in params.iter().cloned() { match moltype { @@ -112,20 +106,11 @@ fn build_siginfo( .track_abundance(param.track_abundance) .build(); - // let sig = Signature::from_params(&cp); // cant set name with this - let template = sourmash::cmd::build_template(&cp); - let sig = Signature::builder() - .hash_function("0.murmur64") - .name(Some(name.to_string())) - .filename(Some(filename.to_string())) - .signatures(template) - .build(); + let sig = Signature::from_params(&cp); sigs.push(sig); - - params_vec.push(param); } - (sigs, params_vec) + sigs } pub fn manysketch( @@ -144,7 +129,7 @@ pub fn manysketch( bail!("No files to load, exiting."); } - // if output doesnt end in zip, bail + // if output doesn't end in zip, bail if Path::new(&output) .extension() .map_or(true, |ext| ext != "zip") @@ -195,7 +180,7 @@ pub fn manysketch( } // build sig templates from params - let (mut sigs, sig_params) = build_siginfo(¶ms_vec, moltype, name, filename); + let mut sigs = build_siginfo(¶ms_vec, moltype); // if no sigs to build, skip if sigs.is_empty() { let _ = skipped_paths.fetch_add(1, atomic::Ordering::SeqCst); @@ -212,36 +197,38 @@ pub fn manysketch( } }; // parse fasta and add to signature + let mut set_name = false; while let Some(record_result) = reader.next() { match record_result { Ok(record) => { // do we need to normalize to make sure all the bases are consistently capitalized? // let norm_seq = record.normalize(false); - for sig in &mut sigs { + sigs.iter_mut().for_each(|sig| { + if !set_name { + sig.set_name(name); + sig.set_filename(filename.as_str()); + set_name = true; + }; if moltype == "protein" { - sig.add_protein(&record.seq()).unwrap(); + sig.add_protein(&record.seq()) + .expect("Failed to add protein"); } else { - sig.add_sequence(&record.seq(), true).unwrap(); + sig.add_sequence(&record.seq(), true) + .expect("Failed to add sequence"); // if not force, panics with 'N' in dna sequence } - } - } - Err(err) => { - eprintln!("Error while processing record: {:?}", err); + }); } + Err(err) => eprintln!("Error while processing record: {:?}", err), } } - Some((sigs, sig_params, filename)) + + Some(sigs) }) .try_for_each_with( send.clone(), - |s: &mut std::sync::Arc>, - (sigs, sig_params, filename)| { - if let Err(e) = s.send(ZipMessage::SignatureData( - sigs, - sig_params, - filename.clone(), - )) { + |s: &mut std::sync::Arc>, sigs| { + if let Err(e) = s.send(ZipMessage::SignatureData(sigs)) { Err(format!("Unable to send internal data: {:?}", e)) } else { Ok(()) diff --git a/src/mastiff_manygather.rs b/src/mastiff_manygather.rs index cb794735..f5a029c0 100644 --- a/src/mastiff_manygather.rs +++ b/src/mastiff_manygather.rs @@ -23,7 +23,7 @@ pub fn mastiff_manygather( bail!("'{}' is not a valid RevIndex database", index); } // Open database once - let db = RevIndex::open(index, true)?; + let db = RevIndex::open(index, true, None)?; println!("Loaded DB"); let query_collection = load_collection( diff --git a/src/mastiff_manysearch.rs b/src/mastiff_manysearch.rs index 7d793b2c..4f4be0c5 100644 --- a/src/mastiff_manysearch.rs +++ b/src/mastiff_manysearch.rs @@ -24,7 +24,7 @@ pub fn mastiff_manysearch( bail!("'{}' is not a valid RevIndex database", index); } // Open database once - let db = RevIndex::open(index, true)?; + let db = RevIndex::open(index, true, None)?; println!("Loaded DB"); diff --git a/src/python/tests/test_sketch.py b/src/python/tests/test_sketch.py index fdbad0b9..88c99dcf 100644 --- a/src/python/tests/test_sketch.py +++ b/src/python/tests/test_sketch.py @@ -13,11 +13,14 @@ def get_test_data(filename): def make_file_csv(filename, genome_paths, protein_paths = []): + # equalize path lengths by adding "". names = [os.path.basename(x).split('.fa')[0] for x in genome_paths] - # Check if the number of protein paths is less than genome paths - # and fill in the missing paths with "". if len(protein_paths) < len(genome_paths): protein_paths.extend(["" for _ in range(len(genome_paths) - len(protein_paths))]) + elif len(genome_paths) < len(protein_paths): + genome_paths.extend(["" for _ in range(len(protein_paths) - len(genome_paths))]) + names = [os.path.basename(x).split('.fa')[0] for x in protein_paths] + with open(filename, 'wt') as fp: fp.write("name,genome_filename,protein_filename\n") for name, genome_path, protein_path in zip(names, genome_paths, protein_paths): diff --git a/src/utils.rs b/src/utils.rs index f4ed1727..e0d01b71 100644 --- a/src/utils.rs +++ b/src/utils.rs @@ -1,7 +1,6 @@ /// Utility functions for sourmash_plugin_branchwater. use rayon::prelude::*; use sourmash::encodings::HashFunctions; -use sourmash::manifest::Manifest; use sourmash::selection::Select; use anyhow::{anyhow, Context, Result}; @@ -19,12 +18,11 @@ use std::sync::atomic; use std::sync::atomic::AtomicUsize; use sourmash::collection::Collection; -use sourmash::manifest::Record; +use sourmash::manifest::{Manifest, Record}; use sourmash::selection::Selection; use sourmash::signature::{Signature, SigsTrait}; use sourmash::sketch::minhash::KmerMinHash; use sourmash::storage::{FSStorage, InnerStorage, SigStore}; - /// Track a name/minhash. pub struct SmallSignature { @@ -377,11 +375,11 @@ fn collection_from_pathlist( let n_failed = AtomicUsize::new(0); let records: Vec = lines .par_iter() - .filter_map(|path| match Signature::from_path(&path) { + .filter_map(|path| match Signature::from_path(path) { Ok(signatures) => { let recs: Vec = signatures .into_iter() - .flat_map(|v| Record::from_sig(&v, &path)) + .flat_map(|v| Record::from_sig(&v, path)) .collect(); Some(recs) } @@ -760,47 +758,6 @@ impl Serialize for BoolPython { } } -pub fn make_manifest_row( - sig: &Signature, - filename: &Path, - internal_location: &str, - scaled: u64, - num: u32, - abund: bool, - is_dna: bool, - is_protein: bool, -) -> ManifestRow { - if is_dna && is_protein { - panic!("Both is_dna and is_protein cannot be true at the same time."); - } else if !is_dna && !is_protein { - panic!("Either is_dna or is_protein must be true."); - } - let moltype = if is_dna { - "DNA".to_string() - } else { - "protein".to_string() - }; - let sketch = &sig.sketches()[0]; - let ksize: u32 = if is_dna { - sketch.ksize() as u32 - } else { - sketch.ksize() as u32 / 3 - }; - ManifestRow { - internal_location: internal_location.to_string(), - md5: sig.md5sum(), - md5short: sig.md5sum()[0..8].to_string(), - ksize: ksize, - moltype, - num, - scaled, - n_hashes: sketch.size(), - with_abundance: BoolPython(abund), - name: sig.name().to_string(), - filename: filename.to_string(), - } -} - pub fn open_stdout_or_file(output: Option) -> Box { // if output is a file, use open_output_file if let Some(path) = output { @@ -845,7 +802,7 @@ impl Hash for Params { } pub enum ZipMessage { - SignatureData(Vec, Vec, PathBuf), + SignatureData(Vec), WriteManifest, } @@ -863,18 +820,15 @@ pub fn sigwriter( .compression_method(zip::CompressionMethod::Stored) .large_file(true); let mut zip = zip::ZipWriter::new(file_writer); - let mut manifest_rows: Vec = Vec::new(); + let mut manifest_rows: Vec = Vec::new(); // keep track of md5sum occurrences to prevent overwriting duplicates let mut md5sum_occurrences: std::collections::HashMap = std::collections::HashMap::new(); while let Ok(message) = recv.recv() { match message { - ZipMessage::SignatureData(sigs, params, filename) => { - if sigs.len() != params.len() { - bail!("Mismatched lengths of signatures and parameters"); - } - for (sig, param) in sigs.iter().zip(params.iter()) { + ZipMessage::SignatureData(sigs) => { + for sig in sigs.iter() { let md5sum_str = sig.md5sum(); let count = md5sum_occurrences.entry(md5sum_str.clone()).or_insert(0); *count += 1; @@ -884,38 +838,16 @@ pub fn sigwriter( format!("signatures/{}.sig.gz", md5sum_str) }; write_signature(sig, &mut zip, options, &sig_filename); - manifest_rows.push(make_manifest_row( - sig, - &filename, - &sig_filename, - param.scaled, - param.num, - param.track_abundance, - param.is_dna, - param.is_protein, - )); + let records: Vec = Record::from_sig(sig, sig_filename.as_str()); + manifest_rows.extend(records); } } ZipMessage::WriteManifest => { println!("Writing manifest"); // Start the CSV file inside the zip zip.start_file("SOURMASH-MANIFEST.csv", options).unwrap(); - // write manifest version line - writeln!(&mut zip, "# SOURMASH-MANIFEST-VERSION: 1.0").unwrap(); - // scoped block for csv writing - { - let mut csv_writer = Writer::from_writer(&mut zip); - - for row in &manifest_rows { - if let Err(e) = csv_writer.serialize(row) { - eprintln!("Error writing item: {:?}", e); - } - } - // CSV writer must be manually flushed to ensure all data is written - if let Err(e) = csv_writer.flush() { - eprintln!("Error flushing CSV writer: {:?}", e); - } - } // drop csv writer here + let manifest: Manifest = manifest_rows.clone().into(); + manifest.to_writer(&mut zip)?; // Properly finish writing to the ZIP file if let Err(e) = zip.finish() {