Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Reenable salmon tests #1366

Merged
merged 6 commits into from
Jul 2, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 2 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,7 @@ workers/test_volume/*
!workers/test_volume/raw/TEST/TRANSCRIPTOME_INDEX/aegilops_tauschii_short.gtf.gz
!workers/test_volume/raw/TEST/NO_OP/test.txt

# Trying to ignore most of a directory tree but not a specific subfolder is tricky:
# https://stackoverflow.com/questions/5533050/gitignore-exclude-folder-but-include-specific-subfolder
!workers/test_volume/salmon_tests
workers/test_volume/salmon_tests/*
!workers/test_volume/salmon_tests/SRP095529/
!workers/test_volume/salmon_tests/ZEBRAFISH_INDEX
workers/test_volume/salmon_tests/


# Byte-compiled / optimized / DLL files
Expand Down Expand Up @@ -139,4 +134,4 @@ dr_env/
# emacs backup files
*~

.vscode
.vscode
2 changes: 1 addition & 1 deletion workers/data_refinery_workers/processors/salmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,7 +408,7 @@ def _find_or_download_index(job_context: Dict) -> Dict:
except Exception as e:
error_template = "Failed to download or extract transcriptome index for organism {0}: {1}"
error_message = error_template.format(str(job_context['organism']), str(e))
logger.error(error_message, processor_job=job_context["job_id"])
logger.exception(error_message, processor_job=job_context["job_id"])
job_context["job"].failure_reason = error_message
job_context["success"] = False

Expand Down
13 changes: 3 additions & 10 deletions workers/data_refinery_workers/processors/test_salmon.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,13 +268,6 @@ def check_salmon_quant(self, job_context, sample_dir):
job_context = salmon._determine_index_length(job_context)
job_context = salmon._find_or_download_index(job_context)

# This is a brittle/hacky patch.
# However I am unsure why the double_reads reads are
# determined to be short but require a long index to be
# processed successfully.
if "test_experiment" in sample_dir:
job_context["index_directory"] = job_context["index_directory"].replace("SHORT", "LONG")

job_context = salmon._run_salmon(job_context)
job_context = salmon.get_tximport_inputs(job_context)
job_context = salmon.tximport(job_context)
Expand Down Expand Up @@ -767,7 +760,7 @@ def run_tximport_at_progress_point(complete_accessions: List[str], incomplete_ac
"""
# Create the experiment
experiment_accession = 'SRP095529'
data_dir = '/home/user/data_store/salmon_tests/'
data_dir = '/home/user/data_store/'
experiment_dir = data_dir + experiment_accession
experiment = Experiment.objects.create(accession_code=experiment_accession)

Expand All @@ -781,12 +774,12 @@ def run_tximport_at_progress_point(complete_accessions: List[str], incomplete_ac
organism_index.index_type = "TRANSCRIPTOME_SHORT"
organism_index.organism = zebrafish
organism_index.result = computational_result_short
organism_index.absolute_directory_path = "/home/user/data_store/salmon_tests/ZEBRAFISH_INDEX/SHORT"
organism_index.absolute_directory_path = "/home/user/data_store/ZEBRAFISH_INDEX/SHORT"
organism_index.save()

comp_file = ComputedFile()
# This path will not be used because we already have the files extracted.
comp_file.absolute_file_path = "/home/user/data_store/salmon_tests/ZEBRAFISH_INDEX/SHORT/zebrafish_short.tar.gz"
comp_file.absolute_file_path = "/home/user/data_store/ZEBRAFISH_INDEX/SHORT/zebrafish_short.tar.gz"
comp_file.result = computational_result_short
comp_file.size_in_bytes=1337
comp_file.sha1="ABC"
Expand Down
15 changes: 7 additions & 8 deletions workers/run_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -61,12 +61,14 @@ fi
test_data_repo="https://s3.amazonaws.com/data-refinery-test-assets"

if [[ -z $tag || $tag == "salmon" ]]; then
# Download "salmon quant" test data
# Download "salmon quant" test data The `newer` file was to
# signify that we using updated data. However the data has been
# updated again so now we need to go back to checking to make sure
# that it's not there so we know we have even NEWER data.
if [[ ! -e $volume_directory/salmon_tests || -e $volume_directory/salmon_tests/newer ]]; then
# Remove the data that comes from S3 so anything old is blown away.
rm -rf $volume_directory/salmon_tests

# TODO: rename the test_data_new to test_data and remove check for
# the new file. These are here temporarily so other branches'
# tests don't break.
if [[ ! -e $volume_directory/salmon_tests || ! -e $volume_directory/salmon_tests/newer ]]; then
echo "Downloading 'salmon quant' test data..."
wget -q -O $volume_directory/salmon_tests.tar.gz $test_data_repo/salmon_tests_newer.tar.gz
tar xzf $volume_directory/salmon_tests.tar.gz -C $volume_directory
Expand Down Expand Up @@ -461,9 +463,6 @@ for image in ${worker_images[*]}; do
elif [[ $tag == "janitor" ]]; then
./prepare_image.sh -i smasher -s workers
image_name=ccdlstaging/dr_smasher
elif [[ $tag == "salmon" ]]; then
# ignore salmon tests temporarily
continue
else
./prepare_image.sh -i $image -s workers
image_name=ccdlstaging/dr_$image
Expand Down