From 8165481b24565ad3df12739ed67063977011630c Mon Sep 17 00:00:00 2001 From: bencomp Date: Fri, 8 Apr 2016 18:05:18 +0200 Subject: [PATCH 01/37] Correct pom.xml indentation --- pom.xml | 285 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 143 insertions(+), 142 deletions(-) diff --git a/pom.xml b/pom.xml index e15f5a87529..c8c46e87396 100644 --- a/pom.xml +++ b/pom.xml @@ -16,30 +16,30 @@ - prime-repo - PrimeFaces Maven Repository - http://repository.primefaces.org - default + prime-repo + PrimeFaces Maven Repository + http://repository.primefaces.org + default - - - geotk-repo - Geo Toolkit Maven Repository - http://maven.geotoolkit.org - default - - - central-repo - Central Repository - http://repo1.maven.org/maven2 - default - - + + + geotk-repo + Geo Toolkit Maven Repository + http://maven.geotoolkit.org + default + + + central-repo + Central Repository + http://repo1.maven.org/maven2 + default + + dvn.private Local repository for hosting jars not available from network repositories. file://${project.basedir}/local_lib - + dataone.org http://dev-testing.dataone.org/maven @@ -51,9 +51,9 @@ - + - + junit junit 4.8.1 @@ -117,7 +117,8 @@ gson 2.2.4 compile - + + xom xom @@ -158,20 +159,20 @@ 1.0.10 - org.atmosphere - atmosphere-runtime - 2.4.2 - + org.atmosphere + atmosphere-runtime + 2.4.2 + org.omnifaces omnifaces 1.7 - - - org.hibernate - hibernate-validator - 5.0.3.Final - + + + org.hibernate + hibernate-validator + 5.0.3.Final + commons-lang commons-lang @@ -188,103 +189,103 @@ commons-logging 1.1.3 - - org.apache.commons - commons-math - 2.2 - - - commons-validator - commons-validator - 1.4.0 - - - colt - colt - 1.2.0 - - - - nom.tam.fits - fits - 2012-10-25-generated - - + + org.apache.commons + commons-math + 2.2 + + + commons-validator + commons-validator + 1.4.0 + + + colt + colt + 1.2.0 + + + + nom.tam.fits + fits + 2012-10-25-generated + + net.handle handle 2006-06-16-generated - - - edu.harvard.iq.dvn - unf5 - 5.0 - - - - org.dataverse - unf - 6.0 - - - - - org.nuiton.thirdparty - REngine - 0.6-1 - - - org.nuiton.thirdparty - Rserve - 0.6-1 - - - - org.apache.poi - poi - 3.10-FINAL - - - org.apache.poi - poi-ooxml - 3.10-FINAL - - - org.apache.poi - poi-examples - 3.10-FINAL - - - edu.harvard.hul.ois.jhove - jhove - 1.11.0 - - - edu.harvard.hul.ois.jhove - jhove-module - 1.11.0 - - - edu.harvard.hul.ois.jhove - jhove-handler - 1.11.0 - - - - javax.media - jai_imageio - 1.1.1 - - - javax.media - jai_core - 1.1.3 - - - javax.media - jai_codec - 1.1.3 - + + + edu.harvard.iq.dvn + unf5 + 5.0 + + + + org.dataverse + unf + 6.0 + + + + + org.nuiton.thirdparty + REngine + 0.6-1 + + + org.nuiton.thirdparty + Rserve + 0.6-1 + + + + org.apache.poi + poi + 3.10-FINAL + + + org.apache.poi + poi-ooxml + 3.10-FINAL + + + org.apache.poi + poi-examples + 3.10-FINAL + + + edu.harvard.hul.ois.jhove + jhove + 1.11.0 + + + edu.harvard.hul.ois.jhove + jhove-module + 1.11.0 + + + edu.harvard.hul.ois.jhove + jhove-handler + 1.11.0 + + + + javax.media + jai_imageio + 1.1.1 + + + javax.media + jai_core + 1.1.3 + + + javax.media + jai_codec + 1.1.3 + org.ocpsoft.rewrite rewrite-servlet @@ -295,8 +296,8 @@ rewrite-config-prettyfaces 2.0.12.Final - - edu.ucsb.nceas + + edu.ucsb.nceas ezid 1.0.0 jar @@ -368,7 +369,7 @@ log4j 1.2.17 - + @@ -376,8 +377,8 @@ src/main/java - *.properties - **/mime.types + *.properties + **/mime.types **/*.R @@ -408,12 +409,12 @@ maven-jar-plugin 2.3 - - - true - true - - + + + true + true + + @@ -422,12 +423,12 @@ 2.3 false - - - true - true - - + + + true + true + + From 2b7232d72e5221514c5e4455922d81dd4becf6ca Mon Sep 17 00:00:00 2001 From: bmckinney Date: Mon, 11 Apr 2016 12:57:13 -0400 Subject: [PATCH 02/37] adds "Laboratory" and "Research Group" to list of Dataverse categories --- src/main/java/Bundle.properties | 2 ++ src/main/java/edu/harvard/iq/dataverse/Dataverse.java | 8 ++++++-- src/main/webapp/dataverse.xhtml | 2 ++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index 5a6d571d06e..977f4c0ee9d 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -270,6 +270,8 @@ dataverse.type.selectTab.journals=Journal dataverse.type.selectTab.organizationsAndInsitutions=Organization or Institution dataverse.type.selectTab.teachingCourses=Teaching Course dataverse.type.selectTab.uncategorized=Uncategorized +dataverse.type.selectTab.researchGroup=Research Group +dataverse.type.selectTab.laboratory=Laboratory dataverse.description.title=A summary describing the purpose, nature, or scope of this dataverse. dataverse.email=Email diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index b97d3402f81..44ab7c59f7e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -56,7 +56,7 @@ public class Dataverse extends DvObjectContainer { public enum DataverseType { - RESEARCHERS, RESEARCH_PROJECTS, JOURNALS, ORGANIZATIONS_INSTITUTIONS, TEACHING_COURSES, UNCATEGORIZED + RESEARCHERS, RESEARCH_PROJECTS, JOURNALS, ORGANIZATIONS_INSTITUTIONS, TEACHING_COURSES, UNCATEGORIZED, LABORATORY, RESEARCH_GROUP }; private static final long serialVersionUID = 1L; @@ -115,7 +115,11 @@ public String getFriendlyCategoryName(){ case ORGANIZATIONS_INSTITUTIONS: return "Organization or Institution"; case TEACHING_COURSES: - return "Teaching Course"; + return "Teaching Course"; + case LABORATORY: + return "Laboratory"; + case RESEARCH_GROUP: + return "Research Group"; case UNCATEGORIZED: return uncategorizedString; default: diff --git a/src/main/webapp/dataverse.xhtml b/src/main/webapp/dataverse.xhtml index 2992ca8c376..8a1100366be 100644 --- a/src/main/webapp/dataverse.xhtml +++ b/src/main/webapp/dataverse.xhtml @@ -122,6 +122,8 @@ + + From 6f0583f8fe2e24ea784785c15ea535605ebaee4d Mon Sep 17 00:00:00 2001 From: Eleni Castro Date: Thu, 14 Apr 2016 21:57:32 -0400 Subject: [PATCH 03/37] update tsv files #3073 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change from “showabovefold” to more accurate “displayoncreate” #3073 --- scripts/api/data/metadatablocks/astrophysics.tsv | 2 +- scripts/api/data/metadatablocks/biomedical.tsv | 2 +- scripts/api/data/metadatablocks/citation.tsv | 2 +- scripts/api/data/metadatablocks/customARCS.tsv | 2 +- scripts/api/data/metadatablocks/customCHIA.tsv | 2 +- scripts/api/data/metadatablocks/customDigaai.tsv | 2 +- scripts/api/data/metadatablocks/customGSD.tsv | 2 +- scripts/api/data/metadatablocks/customMRA.tsv | 2 +- scripts/api/data/metadatablocks/customPSI.tsv | 2 +- scripts/api/data/metadatablocks/customPSRI.tsv | 2 +- scripts/api/data/metadatablocks/custom_hbgdki.tsv | 2 +- scripts/api/data/metadatablocks/geospatial.tsv | 2 +- scripts/api/data/metadatablocks/journals.tsv | 2 +- scripts/api/data/metadatablocks/social_science.tsv | 2 +- 14 files changed, 14 insertions(+), 14 deletions(-) diff --git a/scripts/api/data/metadatablocks/astrophysics.tsv b/scripts/api/data/metadatablocks/astrophysics.tsv index 755cc392147..d6266d239b2 100644 --- a/scripts/api/data/metadatablocks/astrophysics.tsv +++ b/scripts/api/data/metadatablocks/astrophysics.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName astrophysics Astronomy and Astrophysics Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id astroType Type The nature or genre of the content of the files in the dataset. text 0 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics astroFacility Facility The observatory or facility where the data was obtained. text 1 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics astroInstrument Instrument The instrument used to collect the data. text 2 TRUE TRUE TRUE TRUE FALSE FALSE astrophysics diff --git a/scripts/api/data/metadatablocks/biomedical.tsv b/scripts/api/data/metadatablocks/biomedical.tsv index db24a804443..f45c5849845 100644 --- a/scripts/api/data/metadatablocks/biomedical.tsv +++ b/scripts/api/data/metadatablocks/biomedical.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName biomedical Life Sciences Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id studyDesignType Design Type Design types that are based on the overall experimental design. text 0 TRUE TRUE TRUE TRUE FALSE FALSE biomedical studyFactorType Factor Type Factors used in the Dataset. text 1 TRUE TRUE TRUE TRUE FALSE FALSE biomedical studyAssayOrganism Organism The taxonomic name of the organism used in the Dataset or from which the starting biological material derives. text 2 TRUE TRUE TRUE TRUE FALSE FALSE biomedical diff --git a/scripts/api/data/metadatablocks/citation.tsv b/scripts/api/data/metadatablocks/citation.tsv index 322cd558bfe..7ea1330c8cb 100644 --- a/scripts/api/data/metadatablocks/citation.tsv +++ b/scripts/api/data/metadatablocks/citation.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName citation Citation Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id title Title Full title by which the Dataset is known. Enter title... text 0 TRUE FALSE FALSE FALSE TRUE TRUE citation subtitle Subtitle A secondary title used to amplify or state certain limitations on the main title. text 1 FALSE FALSE FALSE FALSE FALSE FALSE citation alternativeTitle Alternative Title A title by which the work is commonly referred, or an abbreviation of the title. text 2 FALSE FALSE FALSE FALSE FALSE FALSE citation diff --git a/scripts/api/data/metadatablocks/customARCS.tsv b/scripts/api/data/metadatablocks/customARCS.tsv index f74c1073694..e287349b830 100644 --- a/scripts/api/data/metadatablocks/customARCS.tsv +++ b/scripts/api/data/metadatablocks/customARCS.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName customARCS Alliance for Research on Corporate Sustainability Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id ARCS1 1) Were any of these data sets a) purchased, b) obtained through licensed databases, or c) provided by an organization under a nondisclosure or other agreement? Licensed agreement of deposited data. text 0 FALSE TRUE FALSE FALSE FALSE FALSE customARCS ARCS2 2) If you responded Yes to Q1, have you ensured that sharing the data does not violate terms of the agreement? If you responded No to Q1, please enter N/A here. Data sharing does not violate terms. text 1 FALSE TRUE FALSE FALSE FALSE FALSE customARCS ARCS3 3) Do any of these data sets include individual-level data (either collected or pre-existing in the dataset) that might make them subject to U.S. or international human subjects considerations? Human subjects consideration. text 2 FALSE TRUE FALSE FALSE FALSE FALSE customARCS diff --git a/scripts/api/data/metadatablocks/customCHIA.tsv b/scripts/api/data/metadatablocks/customCHIA.tsv index c2d52cf164f..255981c5418 100644 --- a/scripts/api/data/metadatablocks/customCHIA.tsv +++ b/scripts/api/data/metadatablocks/customCHIA.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName customCHIA CHIA Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id sourceCHIA Source Source - This describes the source of the data. Is it from the Bureau of Labor and Statistics? Is it data from the United Nations? text 0 TRUE FALSE FALSE TRUE FALSE FALSE customCHIA datesAdditionalInformationCHIA Dates - Additional Information Dates - Additional Information - Note any additional information about dates or time periods in the dataset including intervals (annual, decennial, centennial, etc.) Also note the column(s) in the dataset where dates and other temporal information can be found. text 1 TRUE FALSE FALSE FALSE FALSE FALSE customCHIA variablesCHIA Variables Variables - Define the variables in this dataset. Please note the column in the dataset where variable information can be found. textbox 2 TRUE FALSE FALSE FALSE FALSE FALSE customCHIA diff --git a/scripts/api/data/metadatablocks/customDigaai.tsv b/scripts/api/data/metadatablocks/customDigaai.tsv index e419e93747e..fac077e201c 100644 --- a/scripts/api/data/metadatablocks/customDigaai.tsv +++ b/scripts/api/data/metadatablocks/customDigaai.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName customDigaai Digaai Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id titulo Título Título do jornal ou revista. text 0 TRUE TRUE TRUE TRUE FALSE FALSE customDigaai numero Número Número do jornal ou revista. text 1 TRUE FALSE FALSE TRUE FALSE FALSE customDigaai datadePublicao Data de Publicação Entrar dia/mes/ano. dia/mes/ano text 2 TRUE FALSE FALSE TRUE FALSE FALSE customDigaai diff --git a/scripts/api/data/metadatablocks/customGSD.tsv b/scripts/api/data/metadatablocks/customGSD.tsv index 9a84a70cc7e..d15a4e89748 100644 --- a/scripts/api/data/metadatablocks/customGSD.tsv +++ b/scripts/api/data/metadatablocks/customGSD.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName customGSD Graduate School of Design Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id gsdStudentName Student Name Full name of the student: Last Name, First Name (example: Smith, Jane). Use the name that the GSD Administrator has on file. LastName, FirstName text 0 TRUE FALSE TRUE FALSE FALSE FALSE customGSD gsdStudentProgram Student's Program of Study Student's program of study. text 1 TRUE TRUE TRUE TRUE FALSE FALSE customGSD gsdCourseName Course Name Name of the course. text 2 TRUE TRUE FALSE TRUE FALSE FALSE customGSD diff --git a/scripts/api/data/metadatablocks/customMRA.tsv b/scripts/api/data/metadatablocks/customMRA.tsv index 5f8c1d07466..ea915575c21 100644 --- a/scripts/api/data/metadatablocks/customMRA.tsv +++ b/scripts/api/data/metadatablocks/customMRA.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName customMRA MRA Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id mraCollection Murray Research Archive Collection Browse the Murray Research Archive collection with the following terms. text 0 FALSE TRUE TRUE TRUE FALSE FALSE customMRA #controlledVocabulary DatasetField Value identifier displayOrder mraCollection Diversity samples: Race, Ethnicity, Sexual Orientation, Religion MRA0 0 diff --git a/scripts/api/data/metadatablocks/customPSI.tsv b/scripts/api/data/metadatablocks/customPSI.tsv index 0d41ea7bf0a..b5103df850b 100644 --- a/scripts/api/data/metadatablocks/customPSI.tsv +++ b/scripts/api/data/metadatablocks/customPSI.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName customPSI PSI Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id psiBehavior Behavior Behavior text 0 TRUE TRUE TRUE TRUE FALSE FALSE customPSI psiDonor Donor Donor text 1 TRUE TRUE TRUE TRUE FALSE FALSE customPSI psiHealthArea Health Area Health Area text 2 TRUE TRUE TRUE TRUE FALSE FALSE customPSI diff --git a/scripts/api/data/metadatablocks/customPSRI.tsv b/scripts/api/data/metadatablocks/customPSRI.tsv index 64f2e667223..94936872adb 100644 --- a/scripts/api/data/metadatablocks/customPSRI.tsv +++ b/scripts/api/data/metadatablocks/customPSRI.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName customPSRI Political Science Replication Initiative Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id PSRI1 Are the original data publicly available? Select from the list of options. text 0 FALSE TRUE FALSE FALSE FALSE FALSE customPSRI PSRI2 Is the original code available? Select from the list of options. text 1 FALSE TRUE FALSE FALSE FALSE FALSE customPSRI PSRI3 Where are the original data archived (name and url)? Answer if the data are publicly available. text 2 FALSE FALSE FALSE FALSE FALSE FALSE customPSRI diff --git a/scripts/api/data/metadatablocks/custom_hbgdki.tsv b/scripts/api/data/metadatablocks/custom_hbgdki.tsv index 577b5d90652..bbb098d7689 100644 --- a/scripts/api/data/metadatablocks/custom_hbgdki.tsv +++ b/scripts/api/data/metadatablocks/custom_hbgdki.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName custom_hbgdki HBGDki HBGDki Custom Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id hbgdkiStudyName Name of Study Name of the study. Limit to 20 characters. text 0 TRUE FALSE FALSE FALSE TRUE FALSE custom_hbgdki hbgdkiStudyRegistry Study Registry Which study registry was used? none 1 FALSE FALSE TRUE FALSE TRUE FALSE custom_hbgdki hbgdkiStudyRegistryType ID Type Which study registry was used? text 2 TRUE TRUE FALSE FALSE TRUE FALSE hbgdkiStudyRegistry custom_hbgdki diff --git a/scripts/api/data/metadatablocks/geospatial.tsv b/scripts/api/data/metadatablocks/geospatial.tsv index a3c56130a6d..7464d51dc94 100644 --- a/scripts/api/data/metadatablocks/geospatial.tsv +++ b/scripts/api/data/metadatablocks/geospatial.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName geospatial Geospatial Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id geographicCoverage Geographic Coverage Information on the geographic coverage of the data. Includes the total geographic scope of the data. none 0 FALSE FALSE TRUE FALSE FALSE FALSE geospatial country Country / Nation The country or nation that the Dataset is about. text 1 TRUE TRUE FALSE TRUE FALSE FALSE geographicCoverage geospatial state State / Province The state or province that the Dataset is about. Use GeoNames for correct spelling and avoid abbreviations. text 2 TRUE FALSE FALSE TRUE FALSE FALSE geographicCoverage geospatial diff --git a/scripts/api/data/metadatablocks/journals.tsv b/scripts/api/data/metadatablocks/journals.tsv index ab56655803f..097f0293ba8 100644 --- a/scripts/api/data/metadatablocks/journals.tsv +++ b/scripts/api/data/metadatablocks/journals.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName journal Journal Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id journalVolumeIssue Journal Indicates the volume, issue and date of a journal, which this Dataset is associated with. none 0 FALSE FALSE TRUE FALSE FALSE FALSE journal journalVolume Volume The journal volume which this Dataset is associated with (e.g., Volume 4). text 1 TRUE FALSE FALSE TRUE FALSE FALSE journalVolumeIssue journal journalIssue Issue The journal issue number which this Dataset is associated with (e.g., Number 2, Autumn). text 2 TRUE FALSE FALSE TRUE FALSE FALSE journalVolumeIssue journal diff --git a/scripts/api/data/metadatablocks/social_science.tsv b/scripts/api/data/metadatablocks/social_science.tsv index 29467751e2e..b9fec245a1f 100644 --- a/scripts/api/data/metadatablocks/social_science.tsv +++ b/scripts/api/data/metadatablocks/social_science.tsv @@ -1,6 +1,6 @@ #metadataBlock name dataverseAlias displayName socialscience Social Science and Humanities Metadata -#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable showabovefold required parent metadatablock_id +#datasetField name title description watermark fieldType displayOrder displayFormat advancedSearchField allowControlledVocabulary allowmultiples facetable displayoncreate required parent metadatablock_id unitOfAnalysis Unit of Analysis Basic unit of analysis or observation that this Dataset describes, such as individuals, families/households, groups, institutions/organizations, administrative units, and more. For information about the DDI's controlled vocabulary for this element, please refer to the DDI web page at http://www.ddialliance.org/Specification/DDI-CV/. textbox 0 TRUE FALSE TRUE TRUE FALSE FALSE socialscience universe Universe Description of the population covered by the data in the file; the group of people or other elements that are the object of the study and to which the study results refer. Age, nationality, and residence commonly help to delineate a given universe, but any number of other factors may be used, such as age limits, sex, marital status, race, ethnic group, nationality, income, veteran status, criminal convictions, and more. The universe may consist of elements other than persons, such as housing units, court cases, deaths, countries, and so on. In general, it should be possible to tell from the description of the universe whether a given individual or element is a member of the population under study. Also known as the universe of interest, population of interest, and target population. textbox 1 TRUE FALSE TRUE TRUE FALSE FALSE socialscience timeMethod Time Method The time method or time dimension of the data collection, such as panel, cross-sectional, trend, time- series, or other. text 2 TRUE FALSE FALSE TRUE FALSE FALSE socialscience From cda1b541d75663092b4f6ac7f6ce1d4f9081799f Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 18 Apr 2016 01:23:20 -0400 Subject: [PATCH 04/37] First push into the Harvesting branch. This is a rudimentary framework for the harvesting client (incomplete!) --- pom.xml | 12 +- .../iq/dataverse/DataverseServiceBean.java | 85 +++ .../dataverse/HarvestingDataverseConfig.java | 127 ++++ .../harvest/client/FastGetRecord.java | 520 ++++++++++++++ .../harvest/client/HarvestTimerInfo.java | 97 +++ .../harvest/client/HarvesterServiceBean.java | 671 ++++++++++++++++++ .../timer/DataverseTimerServiceBean.java | 226 ++++++ 7 files changed, 1737 insertions(+), 1 deletion(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java diff --git a/pom.xml b/pom.xml index e15f5a87529..2fbef369667 100644 --- a/pom.xml +++ b/pom.xml @@ -39,7 +39,7 @@ Local repository for hosting jars not available from network repositories. file://${project.basedir}/local_lib - + dataone.org http://dev-testing.dataone.org/maven @@ -49,6 +49,7 @@ true + @@ -368,6 +369,15 @@ log4j 1.2.17 + + + + + + org.dspace + oclc-harvester2 + 0.1.12 + diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index b5f5d78f716..ee08517fc96 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -30,6 +30,8 @@ import java.util.jar.Manifest; import javax.ejb.EJB; import javax.ejb.Stateless; +import javax.ejb.TransactionAttribute; +import javax.ejb.TransactionAttributeType; import javax.inject.Inject; import javax.inject.Named; import javax.persistence.EntityManager; @@ -450,6 +452,10 @@ public Map getAllHarvestedDataverseDescriptions(){ return ret; } + + public List getAllHarvestedDataverses() { + return em.createQuery("SELECT object(d) FROM Dataverse d, harvestingDataverseConfig c AS d WHERE c.dataverse_id IS NOT null AND c.dataverse_id=d.id order by d.id").getResultList(); + } public void populateDvSearchCard(SolrSearchResult solrSearchResult) { @@ -499,4 +505,83 @@ public void populateDvSearchCard(SolrSearchResult solrSearchResult) { } } } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setHarvestResult(Long hdId, String result) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + if (hd.isHarvested()) { + hd.getHarvestingDataverseConfig().setHarvestResult(result); + } + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void resetHarvestingStatus(Long hdId) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + if (hd.isHarvested()) { + hd.getHarvestingDataverseConfig().setHarvestingNow(false); + } + + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setHarvestInProgress(Long hdId, boolean inProgress) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + if (hd.isHarvested()) { + hd.getHarvestingDataverseConfig().setHarvestingNow(inProgress); + } + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setLastHarvestTime(Long hdId, Date lastHarvestTime) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + if (hd.isHarvested()) { + hd.getHarvestingDataverseConfig().setLastHarvestTime(lastHarvestTime); + } + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setHarvestSuccess(Long hdId, Date currentTime, int harvestedCount, int failedCount) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + if (hd.isHarvested()) { + /* TODO: + hd.getHarvestingDataverseConfig().setLastSuccessfulHarvestTime(currentTime); + hd.getHarvestingDataverseConfig().setHarvestedStudyCount(new Long(harvestedCount)); + hd.getHarvestingDataverseConfig().setFailedStudyCount(new Long(failedCount)); + */ + hd.getHarvestingDataverseConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_SUCCESS); + } + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setHarvestSuccessNotEmpty(Long hdId, Date currentTime, int harvestedCount, int failedCount) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + if (hd.isHarvested()) { + /* TODO: + hd.getHarvestingDataverseConfig().setLastSuccessfulNonZeroHarvestTime(currentTime); + hd.getHarvestingDataverseConfig().setHarvestedStudyCountNonZero(new Long(harvestedCount)); + hd.getHarvestingDataverseConfig().setFailedStudyCountNonZero(new Long(failedCount)); + */ + } + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setHarvestFailure(Long hdId, int harvestedStudyCount, int failedCount) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + if (hd.isHarvested()) { + /* TODO: + hd.getHarvestingDataverseConfig().setHarvestedStudyCount(new Long(harvestedStudyCount)); + hd.getHarvestingDataverseConfig().setFailedStudyCount(new Long(failedCount)); + */ + hd.getHarvestingDataverseConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_FAILED); + } + + } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java index 28df6e19e65..6ded994902d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java @@ -6,6 +6,10 @@ package edu.harvard.iq.dataverse; import java.io.Serializable; +import java.text.SimpleDateFormat; +import java.util.Calendar; +import java.util.Date; +import java.util.GregorianCalendar; import javax.persistence.CascadeType; import javax.persistence.Column; import javax.persistence.Entity; @@ -16,6 +20,8 @@ import javax.persistence.JoinColumn; import javax.persistence.OneToOne; import javax.persistence.Table; +import javax.persistence.Temporal; +import javax.persistence.TemporalType; /** * @@ -56,6 +62,9 @@ public void setId(Long id) { public static final String REMOTE_ARCHIVE_URL_LEVEL_DATASET="dataset"; public static final String REMOTE_ARCHIVE_URL_LEVEL_FILE="file"; + public static final String SCHEDULE_PERIOD_DAILY="daily"; + public static final String SCHEDULE_PERIOD_WEEKLY="weekly"; + public HarvestingDataverseConfig() { this.harvestType = HARVEST_TYPE_OAI; // default harvestType this.harvestStyle = HARVEST_STYLE_DATAVERSE; // default harvestStyle @@ -84,6 +93,10 @@ public void setHarvestType(String harvestType) { this.harvestType = harvestType; } + public boolean isOai() { + return HARVEST_TYPE_OAI.equals(harvestType); + } + String harvestStyle; public String getHarvestStyle() { @@ -134,10 +147,124 @@ public String getHarvestingSet() { public void setHarvestingSet(String harvestingSet) { this.harvestingSet = harvestingSet; } + + private String metadataPrefix; + + public String getMetadataPrefix() { + return metadataPrefix; + } + + public void setMetadataPrefix(String metadataPrefix) { + this.metadataPrefix = metadataPrefix; + } + + private String harvestResult; + + public String getHarvestResult() { + return harvestResult; + } + + public void setHarvestResult(String harvestResult) { + this.harvestResult = harvestResult; + } + + @Temporal(value = TemporalType.TIMESTAMP) + private Date lastHarvestTime; + + public Date getLastHarvestTime() { + return lastHarvestTime; + } + + public void setLastHarvestTime(Date lastHarvestTime) { + this.lastHarvestTime = lastHarvestTime; + } + + @Temporal(value = TemporalType.TIMESTAMP) + private Date lastSuccessfulHarvestTime; + + public Date getLastSuccessfulHarvestTime() { + return lastSuccessfulHarvestTime; + } + + public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) { + this.lastSuccessfulHarvestTime = lastSuccessfulHarvestTime; + } + + private Long harvestedDatasetCount; + private Long failedDatasetCount; + @Temporal(value = TemporalType.TIMESTAMP) + private Date lastSuccessfulNonEmptyHarvestTime; + private Long lastNonEmptyHarvestedDatasetCount; + private Long lastNonEmptyFailedDatasetCount; + + private boolean scheduled; + + public boolean isScheduled() { + return this.scheduled; + } + + public void setScheduled(boolean scheduled) { + this.scheduled = scheduled; + } + + private String schedulePeriod; + + public String getSchedulePeriod() { + return schedulePeriod; + } + + public void setSchedulePeriod(String schedulePeriod) { + this.schedulePeriod = schedulePeriod; + } + + private Integer scheduleHourOfDay; + + public Integer getScheduleHourOfDay() { + return scheduleHourOfDay; + } + + public void setScheduleHourOfDay(Integer scheduleHourOfDay) { + this.scheduleHourOfDay = scheduleHourOfDay; + } + + private Integer scheduleDayOfWeek; + + public Integer getScheduleDayOfWeek() { + return scheduleDayOfWeek; + } + public void setScheduleDayOfWeek(Integer scheduleDayOfWeek) { + this.scheduleDayOfWeek = scheduleDayOfWeek; + } + public String getScheduleDescription() { + Date date = new Date(); + Calendar cal = new GregorianCalendar(); + cal.setTime(date); + SimpleDateFormat weeklyFormat = new SimpleDateFormat(" E h a "); + SimpleDateFormat dailyFormat = new SimpleDateFormat(" h a "); + String desc = "Not Scheduled"; + if (schedulePeriod!=null && schedulePeriod!="") { + cal.set(Calendar.HOUR_OF_DAY, scheduleHourOfDay); + if (schedulePeriod.equals(this.SCHEDULE_PERIOD_WEEKLY)) { + cal.set(Calendar.DAY_OF_WEEK,scheduleDayOfWeek); + desc="Weekly, "+weeklyFormat.format(cal.getTime()); + } else { + desc="Daily, "+dailyFormat.format(cal.getTime()); + } + } + return desc; + } + private boolean harvestingNow; + public boolean isHarvestingNow() { + return this.harvestingNow; + } + + public void setHarvestingNow(boolean harvestingNow) { + this.harvestingNow = harvestingNow; + } @Override public int hashCode() { diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java new file mode 100644 index 00000000000..f87e182ecf8 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java @@ -0,0 +1,520 @@ +/* + Copyright (C) 2005-2012, by the President and Fellows of Harvard College. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Dataverse Network - A web application to share, preserve and analyze research data. + Developed at the Institute for Quantitative Social Science, Harvard University. + Version 3.0. +*/ +package edu.harvard.iq.dataverse.harvest.client; + +import java.io.IOException; +import java.io.FileNotFoundException; + +import java.io.InputStream; +import java.io.StringReader; +import java.io.InputStreamReader; +import java.io.BufferedReader; +import java.io.File; + +import java.io.FileOutputStream; + +import java.io.PrintWriter; +import java.net.HttpURLConnection; +import java.net.URL; + +import java.util.zip.GZIPInputStream; +import java.util.zip.InflaterInputStream; +import java.util.zip.ZipInputStream; + + +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.TransformerException; +import org.xml.sax.SAXException; + +//import org.xml.sax.InputSource; + +import javax.xml.stream.XMLStreamConstants; +import javax.xml.stream.XMLStreamException; +import javax.xml.stream.XMLStreamReader; +import javax.xml.stream.XMLInputFactory; + +/* + * This is an optimized implementation of OAIPMH GetRecord method. + * Some code is borrowed from the OCLC implementation. + * It handles the retrieval of the record in a drastically different manner: + * It parses and validates the top, "administrative" portion of the record using + * an event-driven parser. Once it reaches the "payload", the actual metadata + * record enclosed in ... tags, it just reads it line by + * line without parsing and saves it in a temp file. (The record will be parsed + * and validated in the next step, when we attempt to import it). + * On a very large record, for example, a DDI of a Dataset with a large number + * of associated data variables, even event-driven XML parsing can end up + * being rather expensive. + * This optimized version was originally written for DVN 3.*. + * Added in Dataverse 4: custom protocol extension for sending the metadata + * record as a pre-declared numbe of bytes. + * @author Leonid Andreev + * +*/ + +public class FastGetRecord { + + /** + * Client-side GetRecord verb constructor + * + * @param baseURL the baseURL of the server to be queried + * @exception MalformedURLException the baseURL is bad + * @exception SAXException the xml response is bad + * @exception IOException an I/O error occurred + */ + + public FastGetRecord(String baseURL, String identifier, String metadataPrefix) + throws IOException, ParserConfigurationException, SAXException, + TransformerException { + harvestRecord (baseURL, identifier, metadataPrefix); + + } + + private String errorMessage = null; + private File savedMetadataFile = null; + private XMLInputFactory xmlInputFactory = null; + private boolean recordDeleted = false; + + // TODO: logging + + public String getErrorMessage () { + return errorMessage; + } + + public File getMetadataFile () { + return savedMetadataFile; + } + + public boolean isDeleted () { + return this.recordDeleted; + } + + + public void harvestRecord(String baseURL, String identifier, String metadataPrefix) throws IOException, + ParserConfigurationException, SAXException, TransformerException { + + xmlInputFactory = javax.xml.stream.XMLInputFactory.newInstance(); + + String requestURL = getRequestURL(baseURL, identifier, metadataPrefix); + + InputStream in = null; + URL url = new URL(requestURL); + HttpURLConnection con = null; + int responseCode = 0; + + con = (HttpURLConnection) url.openConnection(); + con.setRequestProperty("User-Agent", "OAIHarvester/2.0"); + con.setRequestProperty("Accept-Encoding", + "compress, gzip, identify"); + try { + responseCode = con.getResponseCode(); + //logger.debug("responseCode=" + responseCode); + } catch (FileNotFoundException e) { + //logger.info(requestURL, e); + responseCode = HttpURLConnection.HTTP_UNAVAILABLE; + } + + // TODO: -- L.A. + // + // support for cookies; + // support for limited retry attempts -- ? + // implement reading of the stream as filterinputstream -- ? + // -- that could make it a little faster still. -- L.A. + + + + if (responseCode == 200) { + + String contentEncoding = con.getHeaderField("Content-Encoding"); + //logger.debug("contentEncoding=" + contentEncoding); + + // support for the standard compress/gzip/deflate compression + // schemes: + + if ("compress".equals(contentEncoding)) { + ZipInputStream zis = new ZipInputStream(con.getInputStream()); + zis.getNextEntry(); + in = zis; + } else if ("gzip".equals(contentEncoding)) { + in = new GZIPInputStream(con.getInputStream()); + } else if ("deflate".equals(contentEncoding)) { + in = new InflaterInputStream(con.getInputStream()); + } else { + in = con.getInputStream(); + } + + // We are going to read the OAI header and SAX-parse it for the + // error messages and other protocol information; + // The metadata section we're going to simply save in a temporary + // file, unparsed. + + BufferedReader rd = new BufferedReader(new InputStreamReader(in)); + + String line = null; + String oaiResponseHeader = ""; + boolean metadataFlag = false; + boolean metadataWritten = false; + boolean schemaChecked = false; + + savedMetadataFile = File.createTempFile("meta", ".tmp"); + FileOutputStream tempFileStream = new FileOutputStream(savedMetadataFile); + PrintWriter metadataOut = new PrintWriter (tempFileStream, true); + + metadataOut.println(""); + + int mopen = 0; + int mclose = 0; + + while ( ( line = rd.readLine () ) != null) { + if (!metadataFlag) { + if (line.matches(".*.*")) { + String lineCopy = line; + + int i = line.indexOf(""); + line = line.substring(i+10); + + oaiResponseHeader = oaiResponseHeader.concat(lineCopy.replaceAll(".*", "")); + + metadataFlag = true; + } + } + + if (metadataFlag) { + if (!metadataWritten) { + // Inside an OAI-PMH GetRecord response, the metadata + // record returned is enclosed in ... + // tags, after the OAI service sections that provide the + // date, identifier and other protocol-level information. + // However, it is possible for the metadata record itself + // to have tags of its own. So we have no + // choice but to count the opening and closing tags in + // order to recognize the one terminating the metadata + // section. + // This code isn't pretty, but on seriously large records + // the savings from not fully parsing the XML are + // significant. + // -- L.A. + + if (line.matches(" -1) { + if (!line.substring(i).matches("^]*/")) { + // don't count if it's a closed, empty tag: + // + mopen++; + } + i+=10; + } + } + if (line.matches(".*.*")) { + int i = 0; + while ((i = line.indexOf("", i)) > -1) { + i+=11; + mclose++; + } + + if ( mclose > mopen ) { + line = line.substring(0, line.lastIndexOf("")); + metadataWritten = true; + } + } + + if (!schemaChecked) { + // if the top-level XML element lacks the schema definition, + // insert the generic xmlns and xmlns:xsi attributes; these + // may be needed by the transform stylesheets. + // this mimicks the behaviour of the OCLC GetRecord + // client implementation. + // -L.A. + + int offset = 0; + + // However, there may be one or more XML comments before + // the first "real" XML element (of the form + // ). So we need to skip these! + + while ( (line.indexOf('<', offset) > -1) + && + "': + + while (line != null + && + ((offset = line.indexOf("-->",offset)) < 0)) { + line = line.replaceAll("[\n\r]", " "); + offset = line.length(); + line = line.concat(rd.readLine()); + } + + offset += 3; + } + + // if we have skipped some comments, is there another + // XML element left in the buffered line? + int firstElementStart = -1; + + if ((firstElementStart = line.indexOf('<', offset)) > -1 ) { + // OK, looks like there is. + // is it terminated? + // if not, let's read the stream until + // we find the closing '>': + + int firstElementEnd = -1; + offset = firstElementStart; + + while (line != null + && + ((firstElementEnd = line.indexOf('>',offset)) < 0)) { + + line = line.replaceAll("[\n\r]", ""); + offset = line.length(); + line = line.concat(rd.readLine()); + } + + if (firstElementEnd < 0) { + // this should not happen! + // we've reached the end of the XML stream + // without encountering a single valid XML tag -- ?? + + this.errorMessage = "Malformed GetRecord response; reached the end of the stream but couldn't find a single valid XML element in the metadata section."; + } else { + + // OK, we now have a line that contains a complete, + // terminated (possibly multi-line) first XML element + // that starts at [offset]. + + int i = firstElementStart; + + if (!line.substring(i).matches("^<[^>]*xmlns.*")) { + String head = line.substring(0, i); + String tail = line.substring(i); + tail = tail.replaceFirst(">", " xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">"); + line = head + tail; + } + + schemaChecked = true; + } + } else { + // there was no "real" XML elements, only comments. + // We'll perform this schema check in the next + // iteration. + } + } + + metadataOut.println(line); + } + } else { + oaiResponseHeader = oaiResponseHeader.concat(line); + } + } + + // parse the OAI Record header: + + XMLStreamReader xmlr = null; + + try { + StringReader reader = new StringReader(oaiResponseHeader); + xmlr = xmlInputFactory.createXMLStreamReader(reader); + processOAIheader(xmlr); + + } catch (XMLStreamException ex) { + //Logger.getLogger("global").log(Level.SEVERE, null, ex); + if (this.errorMessage == null) { + this.errorMessage = "Malformed GetRecord response: " + oaiResponseHeader; + } + + // delete the temp metadata file; we won't need it: + if (savedMetadataFile != null) { + //savedMetadataFile.delete(); + } + + } + + try { + if (xmlr != null) { + xmlr.close(); + } + } catch (Exception ed) { + // seems OK to ignore; + } + + + if (rd != null) { + rd.close(); + } + + if (metadataOut != null) { + metadataOut.close(); + } + + if (!(metadataWritten) && !(this.isDeleted())) { + this.errorMessage = "Failed to parse GetRecord response; "+oaiResponseHeader; + //savedMetadataFile.delete(); + } + + if (this.isDeleted()) { + //savedMetadataFile.delete(); + } + + + } else { + this.errorMessage = "GetRecord request failed. HTTP error code "+responseCode; + } + } + + /** + * Construct the query portion of the http request + * (borrowed from OCLC implementation) + * + * @return a String containing the query portion of the http request + */ + private static String getRequestURL(String baseURL, + String identifier, + String metadataPrefix) { + + StringBuffer requestURL = new StringBuffer(baseURL); + requestURL.append("?verb=GetRecord"); + requestURL.append("&identifier=").append(identifier); + requestURL.append("&metadataPrefix=").append(metadataPrefix); + + return requestURL.toString(); + } + + private void processOAIheader (XMLStreamReader xmlr) throws XMLStreamException { + + // is this really a GetRecord response? + xmlr.nextTag(); + xmlr.require(XMLStreamConstants.START_ELEMENT, null, "OAI-PMH"); + processOAIPMH(xmlr); + + } + + private void processOAIPMH (XMLStreamReader xmlr) throws XMLStreamException { + + for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { + if (event == XMLStreamConstants.START_ELEMENT) { + // TODO: + // process all the fields currently skipped -- ? -- L.A. + if (xmlr.getLocalName().equals("responseDate")) {} + else if (xmlr.getLocalName().equals("request")) {} + else if (xmlr.getLocalName().equals("error")) { + String errorCode = xmlr.getAttributeValue(null, "code"); + String errorMessageText = getElementText(xmlr); + + if (errorCode != null) { + this.errorMessage = "GetRecord error code: "+errorCode+"; "; + } + + if (errorCode != null) { + this.errorMessage = this.errorMessage + "GetRecord error message: "+errorMessageText+"; "; + } + throw new XMLStreamException(this.errorMessage); + + } + else if (xmlr.getLocalName().equals("GetRecord")) processGetRecordSection(xmlr); + } else if (event == XMLStreamConstants.END_ELEMENT) { + if (xmlr.getLocalName().equals("OAI-PMH")) return; + } + } + } + + private void processGetRecordSection (XMLStreamReader xmlr) throws XMLStreamException { + for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { + if (event == XMLStreamConstants.START_ELEMENT) { + if (xmlr.getLocalName().equals("record")) {processRecord(xmlr);} + } else if (event == XMLStreamConstants.END_ELEMENT) { + if (xmlr.getLocalName().equals("GetRecord")) return; + } + } + + } + + private void processRecord (XMLStreamReader xmlr) throws XMLStreamException { + for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { + if (event == XMLStreamConstants.START_ELEMENT) { + if (xmlr.getLocalName().equals("header")) { + if ("deleted".equals( xmlr.getAttributeValue(null, "status"))) { + this.recordDeleted = true; + } + processHeader(xmlr); + } else if (xmlr.getLocalName().equals("metadata")) {/*do nothing;*/} + } else if (event == XMLStreamConstants.END_ELEMENT) { + if (xmlr.getLocalName().equals("record")) return; + } + } + } + + private void processHeader (XMLStreamReader xmlr) throws XMLStreamException { + for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { + if (event == XMLStreamConstants.START_ELEMENT) { + if (xmlr.getLocalName().equals("identifier")) {/*do nothing*/} + else if (xmlr.getLocalName().equals("datestamp")) {/*do nothing -- ?*/} + else if (xmlr.getLocalName().equals("setSpec")) {/*do nothing*/} + + + } else if (event == XMLStreamConstants.END_ELEMENT) { + if (xmlr.getLocalName().equals("header")) return; + } + } + } + + + // (from Gustavo's ddiServiceBean -- L.A.) + // + /* We had to add this method because the ref getElementText has a bug where it + * would append a null before the text, if there was an escaped apostrophe; it appears + * that the code finds an null ENTITY_REFERENCE in this case which seems like a bug; + * the workaround for the moment is to comment or handling ENTITY_REFERENCE in this case + */ + private String getElementText(XMLStreamReader xmlr) throws XMLStreamException { + if(xmlr.getEventType() != XMLStreamConstants.START_ELEMENT) { + throw new XMLStreamException("parser must be on START_ELEMENT to read next text", xmlr.getLocation()); + } + int eventType = xmlr.next(); + StringBuffer content = new StringBuffer(); + while(eventType != XMLStreamConstants.END_ELEMENT ) { + if(eventType == XMLStreamConstants.CHARACTERS + || eventType == XMLStreamConstants.CDATA + || eventType == XMLStreamConstants.SPACE + /* || eventType == XMLStreamConstants.ENTITY_REFERENCE*/) { + content.append(xmlr.getText()); + } else if(eventType == XMLStreamConstants.PROCESSING_INSTRUCTION + || eventType == XMLStreamConstants.COMMENT + || eventType == XMLStreamConstants.ENTITY_REFERENCE) { + // skipping + } else if(eventType == XMLStreamConstants.END_DOCUMENT) { + throw new XMLStreamException("unexpected end of document when reading element text content"); + } else if(eventType == XMLStreamConstants.START_ELEMENT) { + throw new XMLStreamException("element text content may not contain START_ELEMENT", xmlr.getLocation()); + } else { + throw new XMLStreamException("Unexpected event type "+eventType, xmlr.getLocation()); + } + eventType = xmlr.next(); + } + return content.toString(); + } + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java new file mode 100644 index 00000000000..96599352cf2 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java @@ -0,0 +1,97 @@ +/* + Copyright (C) 2005-2012, by the President and Fellows of Harvard College. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + + Dataverse Network - A web application to share, preserve and analyze research data. + Developed at the Institute for Quantitative Social Science, Harvard University. + Version 3.0. +*/ +package edu.harvard.iq.dataverse.harvest.client; + +import java.io.Serializable; + +/** + * This class is used when creating an EJB Timer for scheduling Harvesting. + * We use this class rather than the HarvestingDataverse entity because + * the class must be Serializable, and there is too much info associated with the HarvestingDataverse + * in order to realistically serialize it. (We can't make related mapped entities transient.) + * + * Based on the DVN 3 implementation, + * original + * @author Ellen Kraffmiller + * incorporated into Dataverse 4 by + * @author Leonid Andreev + */ +public class HarvestTimerInfo implements Serializable { + private Long harvestingDataverseId; + private String name; + private String schedulePeriod; + private Integer scheduleHourOfDay; + + public HarvestTimerInfo() { + + } + + + public HarvestTimerInfo(Long harvestingDataverseId, String name, String schedulePeriod, Integer scheduleHourOfDay, Integer scheduleDayOfWeek) { + this.harvestingDataverseId=harvestingDataverseId; + this.name=name; + this.schedulePeriod=schedulePeriod; + this.scheduleDayOfWeek=scheduleDayOfWeek; + this.scheduleHourOfDay=scheduleHourOfDay; + } + + + public Long getHarvestingDataverseId() { + return harvestingDataverseId; + } + + public void setHarvestingDataverseId(Long harvestingDataverseId) { + this.harvestingDataverseId = harvestingDataverseId; + } + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + public String getSchedulePeriod() { + return schedulePeriod; + } + + public void setSchedulePeriod(String schedulePeriod) { + this.schedulePeriod = schedulePeriod; + } + + public Integer getScheduleHourOfDay() { + return scheduleHourOfDay; + } + + public void setScheduleHourOfDay(Integer scheduleHourOfDay) { + this.scheduleHourOfDay = scheduleHourOfDay; + } + + public Integer getScheduleDayOfWeek() { + return scheduleDayOfWeek; + } + + public void setScheduleDayOfWeek(Integer scheduleDayOfWeek) { + this.scheduleDayOfWeek = scheduleDayOfWeek; + } + private Integer scheduleDayOfWeek; + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java new file mode 100644 index 00000000000..5f7cfb98e4a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -0,0 +1,671 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.harvest.client; + +import edu.harvard.iq.dataverse.Dataset; +import edu.harvard.iq.dataverse.DatasetServiceBean; +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.HarvestingDataverseConfig; +import edu.harvard.iq.dataverse.timer.DataverseTimerServiceBean; +import edu.harvard.iq.dataverse.util.FileUtil; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.io.Writer; +import java.net.MalformedURLException; +import java.net.URLEncoder; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.Iterator; +import java.util.List; +import java.util.logging.FileHandler; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.annotation.Resource; +import javax.ejb.EJB; +import javax.ejb.EJBException; +import javax.ejb.Stateless; +import javax.ejb.Timeout; +import javax.ejb.Timer; +import javax.ejb.TransactionAttribute; +import javax.ejb.TransactionAttributeType; +import javax.faces.bean.ManagedBean; +import javax.inject.Named; +import javax.persistence.Query; +import javax.xml.bind.JAXBContext; +import javax.xml.bind.JAXBElement; +import javax.xml.bind.JAXBException; +import javax.xml.bind.Unmarshaller; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.TransformerException; +import org.apache.commons.lang.mutable.MutableBoolean; +import org.w3c.dom.Document; +import org.xml.sax.SAXException; + +/** + * + * @author Leonid Andreev + */ +@Stateless(name = "harvesterService") +@Named +@ManagedBean +public class HarvesterServiceBean { + @EJB + DataverseServiceBean dataverseService; + @EJB + DatasetServiceBean datasetService; + @Resource + javax.ejb.TimerService timerService; + @EJB + DataverseTimerServiceBean dataverseTimerService; + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean"); + private static final String HARVEST_TIMER = "HarvestTimer"; + private static final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); + private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); + + public static final String HARVEST_RESULT_SUCCESS="success"; + public static final String HARVEST_RESULT_FAILED="failed"; + + + private JAXBContext jaxbContext; + private Unmarshaller unmarshaller; + + private long processedSizeThisBatch = 0; + private List harvestedDatasetIdsThisBatch = null; + public HarvesterServiceBean() { + + } + + /** + * Called to run an "On Demand" harvest. + * This method creates a timer that will go off immediately, + * which will start an immediate asynchronous harvest. + * @param dataverse + */ + public void doAsyncHarvest(Dataverse harvestedDataverse) { + HarvestingDataverseConfig harvestedDataverseConfig = harvestedDataverse.getHarvestingDataverseConfig(); + + if (harvestedDataverseConfig == null) { + logger.info("ERROR: No Harvesting Configuration found for dataverse id="+harvestedDataverse.getId()); + return; + } + + Calendar cal = Calendar.getInstance(); + + timerService.createTimer(cal.getTime(), new HarvestTimerInfo(harvestedDataverse.getId(), harvestedDataverse.getName(), harvestedDataverseConfig.getSchedulePeriod(), harvestedDataverseConfig.getScheduleHourOfDay(), harvestedDataverseConfig.getScheduleDayOfWeek())); + } + + public void createScheduledHarvestTimers() { + logger.log(Level.INFO, "HarvesterService: going to (re)create Scheduled harvest timers."); + dataverseTimerService.removeHarvestTimers(); + + List dataverses = dataverseService.getAllHarvestedDataverses(); + for (Iterator it = dataverses.iterator(); it.hasNext();) { + Dataverse dataverse = (Dataverse) it.next(); + HarvestingDataverseConfig harvestingConfig = dataverse.getHarvestingDataverseConfig(); + if (harvestingConfig == null) { + logger.warning("ERROR: no harvesting config found for dataverse id="+dataverse.getId()); + } else if (harvestingConfig.isScheduled()) { + createHarvestTimer(dataverse); + } + } + } + + public void removeHarvestTimer(Dataverse dataverse) { + dataverseTimerService.removeHarvestTimer(dataverse); + } + + public void updateHarvestTimer(Dataverse harvestedDataverse) { + removeHarvestTimer(harvestedDataverse); + createHarvestTimer(harvestedDataverse); + } + + public List getHarvestTimers() { + ArrayList timers = new ArrayList(); + // Clear dataverse timer, if one exists + for (Iterator it = timerService.getTimers().iterator(); it.hasNext();) { + Timer timer = (Timer) it.next(); + if (timer.getInfo() instanceof HarvestTimerInfo) { + HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); + timers.add(info); + } + } + return timers; + } + + private void createHarvestTimer(Dataverse harvestingDataverse) { + HarvestingDataverseConfig harvestingDataverseConfig = harvestingDataverse.getHarvestingDataverseConfig(); + + if (harvestingDataverseConfig == null) { + logger.info("ERROR: No Harvesting Configuration found for dataverse id="+harvestingDataverse.getId()); + return; + } + + if (harvestingDataverseConfig.isScheduled()) { + long intervalDuration = 0; + Calendar initExpiration = Calendar.getInstance(); + initExpiration.set(Calendar.MINUTE, 0); + initExpiration.set(Calendar.SECOND, 0); + if (harvestingDataverseConfig.getSchedulePeriod().equals(harvestingDataverseConfig.SCHEDULE_PERIOD_DAILY)) { + intervalDuration = 1000 * 60 * 60 * 24; + initExpiration.set(Calendar.HOUR_OF_DAY, harvestingDataverseConfig.getScheduleHourOfDay()); + + } else if (harvestingDataverseConfig.getSchedulePeriod().equals(harvestingDataverseConfig.SCHEDULE_PERIOD_WEEKLY)) { + intervalDuration = 1000 * 60 * 60 * 24 * 7; + initExpiration.set(Calendar.HOUR_OF_DAY, harvestingDataverseConfig.getScheduleHourOfDay()); + initExpiration.set(Calendar.DAY_OF_WEEK, harvestingDataverseConfig.getScheduleDayOfWeek()); + + } else { + logger.log(Level.WARNING, "Could not set timer for dataverse id, " + harvestingDataverse.getId() + ", unknown schedule period: " + harvestingDataverseConfig.getSchedulePeriod()); + return; + } + Date initExpirationDate = initExpiration.getTime(); + Date currTime = new Date(); + if (initExpirationDate.before(currTime)) { + initExpirationDate.setTime(initExpiration.getTimeInMillis() + intervalDuration); + } + logger.log(Level.INFO, "Setting timer for dataverse " + harvestingDataverse.getName() + ", initial expiration: " + initExpirationDate); + dataverseTimerService.createTimer(initExpirationDate, intervalDuration, new HarvestTimerInfo(harvestingDataverse.getId(), harvestingDataverse.getName(), harvestingDataverseConfig.getSchedulePeriod(), harvestingDataverseConfig.getScheduleHourOfDay(), harvestingDataverseConfig.getScheduleDayOfWeek())); + } + } + + /** + * This method is called whenever an EJB Timer goes off. + * Check to see if this is a Harvest Timer, and if it is + * Run the harvest for the given (scheduled) dataverse + * @param timer + */ + @Timeout + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + public void handleTimeout(javax.ejb.Timer timer) { + // We have to put all the code in a try/catch block because + // if an exception is thrown from this method, Glassfish will automatically + // call the method a second time. (The minimum number of re-tries for a Timer method is 1) + + if (timer.getInfo() instanceof HarvestTimerInfo) { + HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); + try { + // First, check if we are in read-only mode: + /* + if (...) { + logger.log(Level.ALL, "Dataverse is in read-only mode."); + return; + + } + */ + + // Proceeding with the scheduled harvest: + + logger.log(Level.INFO, "DO HARVESTING of dataverse " + info.getHarvestingDataverseId()); + doHarvesting(info.getHarvestingDataverseId()); + + } catch (Throwable e) { + dataverseService.setHarvestResult(info.getHarvestingDataverseId(), this.HARVEST_RESULT_FAILED); + /*mailService.sendHarvestErrorNotification(...getSystemEmail(), ...);*/ + logException(e, logger); + } + } + } + + /** + * Harvest an individual Dataverse + * @param dataverseId + */ + public void doHarvesting(Long dataverseId) throws IOException { + Dataverse harvestingDataverse = dataverseService.find(dataverseId); + + if (harvestingDataverse == null) { + throw new IOException("No such Dataverse: id="+dataverseId); + } + + HarvestingDataverseConfig harvestingDataverseConfig = harvestingDataverse.getHarvestingDataverseConfig(); + + if (harvestingDataverseConfig == null) { + throw new IOException("Could not find Harvesting Config for Dataverse id="+dataverseId); + } + + MutableBoolean harvestErrorOccurred = new MutableBoolean(false); + String logTimestamp = logFormatter.format(new Date()); + Logger hdLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean." + harvestingDataverse.getAlias() + logTimestamp); + String logFileName = /* TODO: !!!! FileUtil.getImportFileDir() +*/ File.separator + "harvest_" + harvestingDataverse.getAlias() + logTimestamp + ".log"; + FileHandler fileHandler = new FileHandler(logFileName); + hdLogger.addHandler(fileHandler); + List harvestedDatasetIds = null; + + this.processedSizeThisBatch = 0; + this.harvestedDatasetIdsThisBatch = new ArrayList(); + + List failedIdentifiers = new ArrayList(); + try { + boolean harvestingNow = harvestingDataverseConfig.isHarvestingNow(); + + if (harvestingNow) { + harvestErrorOccurred.setValue(true); + hdLogger.log(Level.SEVERE, "Cannot begin harvesting, Dataverse " + harvestingDataverse.getName() + " is currently being harvested."); + + } else { + dataverseService.resetHarvestingStatus(harvestingDataverse.getId()); + String until = null; // If we don't set until date, we will get all the changes since the last harvest. + String from = null; + Date lastSuccessfulHarvestTime = harvestingDataverseConfig.getLastSuccessfulHarvestTime(); + if (lastSuccessfulHarvestTime != null) { + from = formatter.format(lastSuccessfulHarvestTime); + } + dataverseService.setHarvestInProgress(harvestingDataverse.getId(), true); + Date currentTime = new Date(); + dataverseService.setLastHarvestTime(harvestingDataverse.getId(), currentTime); + + hdLogger.log(Level.INFO, "BEGIN HARVEST..., oaiUrl=" + harvestingDataverseConfig.getArchiveUrl() + ",set=" + harvestingDataverseConfig.getHarvestingSet() + ", metadataPrefix=" + harvestingDataverseConfig.getMetadataPrefix() + ", from=" + from + ", until=" + until); + + if (harvestingDataverseConfig.isOai()) { + harvestedDatasetIds = harvestOAI(harvestingDataverse, hdLogger, from, until, harvestErrorOccurred, failedIdentifiers); + + } else { + throw new IOException("Unsupported harvest type"); + } + dataverseService.setHarvestSuccess(harvestingDataverse.getId(),currentTime, harvestedDatasetIds.size(), failedIdentifiers.size()); + hdLogger.log(Level.INFO, "COMPLETED HARVEST, server=" + harvestingDataverseConfig.getArchiveUrl() + ", metadataPrefix=" + harvestingDataverseConfig.getMetadataPrefix()); + + /* Last "non-empty" harvest: */ + if (harvestedDatasetIds.size() > 0) { + dataverseService.setHarvestSuccessNotEmpty(harvestingDataverse.getId(),currentTime, harvestedDatasetIds.size(), failedIdentifiers.size()); + hdLogger.log(Level.INFO, "COMPLETED HARVEST with results"); + } + + // now index all studies (need to modify for update) + /* (TODO: !!!) + if (this.processedSizeThisBatch > 0) { + hdLogger.log(Level.INFO, "POST HARVEST, reindexing the remaining studies."); + if (this.harvestedDatasetIdsThisBatch != null) { + hdLogger.log(Level.INFO, this.harvestedDatasetIdsThisBatch.size()+" studies in the batch"); + } + hdLogger.log(Level.INFO, this.processedSizeThisBatch + " bytes of content"); + indexService.updateIndexList(this.harvestedDatasetIdsThisBatch); + hdLogger.log(Level.INFO, "POST HARVEST, calls to index finished."); + } else { + hdLogger.log(Level.INFO, "(All harvested content already reindexed)"); + } + */ + } + //mailService.sendHarvestNotification(...getSystemEmail(), harvestingDataverse.getName(), logFileName, logTimestamp, harvestErrorOccurred.booleanValue(), harvestedDatasetIds.size(), failedIdentifiers); + } catch (Throwable e) { + harvestErrorOccurred.setValue(true); + String message = "Exception processing harvest, server= " + harvestingDataverseConfig.getArchiveUrl() + ",format=" + harvestingDataverseConfig.getMetadataPrefix() + " " + e.getClass().getName() + " " + e.getMessage(); + hdLogger.log(Level.SEVERE, message); + logException(e, hdLogger); + hdLogger.log(Level.INFO, "HARVEST NOT COMPLETED DUE TO UNEXPECTED ERROR."); + dataverseService.setHarvestFailure(harvestingDataverse.getId(), harvestedDatasetIds.size(), failedIdentifiers.size()); + + + } finally { + dataverseService.setHarvestInProgress(harvestingDataverse.getId(), false); + fileHandler.close(); + hdLogger.removeHandler(fileHandler); + } + } + + /** + * + * @param dataverse the dataverse to harvest into + * @param from get updated studies from this beginning date + * @param until get updated studies until this end date + * @param harvestErrorOccurred have we encountered any errors during harvest? + * @param failedIdentifiers Study Identifiers for failed "GetRecord" requests + */ + private List harvestOAI(Dataverse dataverse, Logger hdLogger, String from, String until, MutableBoolean harvestErrorOccurred, List failedIdentifiers) + throws IOException, ParserConfigurationException,SAXException, TransformerException, JAXBException { + + List harvestedDatasetIds = new ArrayList(); + + /* + ResumptionTokenType resumptionToken = null; + + do { + //resumptionToken = harvesterService.harvestFromIdentifiers(hdLogger, resumptionToken, dataverse, from, until, harvestedDatasetIds, failedIdentifiers, harvestErrorOccurred + resumptionToken = harvestFromIdentifiers(hdLogger, resumptionToken, dataverse, from, until, harvestedDatasetIds, failedIdentifiers, harvestErrorOccurred); + } while (resumptionToken != null && !resumptionToken.equals("")); + + hdLogger.log(Level.INFO, "COMPLETED HARVEST, oaiUrl=" + dataverse.getServerUrl() + ",set=" + dataverse.getHarvestingSet() + ", metadataPrefix=" + dataverse.getHarvestFormatType().getMetadataPrefix() + ", from=" + from + ", until=" + until); + + */ + return harvestedDatasetIds; + + } + + /* + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + public ResumptionTokenType harvestFromIdentifiers(Logger hdLogger, ResumptionTokenType resumptionToken, HarvestingDataverse dataverse, String from, String until, List harvestedDatasetIds, List failedIdentifiers, MutableBoolean harvestErrorOccurred) + throws java.io.IOException, ParserConfigurationException, SAXException, TransformerException, JAXBException { + String encodedSet = dataverse.getHarvestingSet() == null ? null : URLEncoder.encode(dataverse.getHarvestingSet(), "UTF-8"); + ListIdentifiers listIdentifiers = null; + + if (resumptionToken == null) { + listIdentifiers = new ListIdentifiers(dataverse.getServerUrl(), + from, + until, + encodedSet, + URLEncoder.encode(dataverse.getHarvestFormatType().getMetadataPrefix(), "UTF-8")); + } else { + hdLogger.log(Level.INFO, "harvestFromIdentifiers(), resumptionToken=" + resumptionToken.getValue()); + listIdentifiers = new ListIdentifiers(dataverse.getServerUrl(), resumptionToken.getValue()); + } + + Document doc = listIdentifiers.getDocument(); + + // JAXBContext jc = JAXBContext.newInstance("edu.harvard.hmdc.vdcnet.jaxb.oai"); + // Unmarshaller unmarshaller = jc.createUnmarshaller(); + JAXBElement unmarshalObj = (JAXBElement) unmarshaller.unmarshal(doc); + OAIPMHtype oaiObj = (OAIPMHtype) unmarshalObj.getValue(); + + if (oaiObj.getError() != null && oaiObj.getError().size() > 0) { + if (oaiObj.getError().get(0).getCode().equals(OAIPMHerrorcodeType.NO_RECORDS_MATCH)) { + hdLogger.info("ListIdentifiers returned NO_RECORDS_MATCH - no studies found to be harvested."); + } else { + handleOAIError(hdLogger, oaiObj, "calling listIdentifiers, oaiServer= " + dataverse.getServerUrl() + ",from=" + from + ",until=" + until + ",encodedSet=" + encodedSet + ",format=" + dataverse.getHarvestFormatType().getMetadataPrefix()); + throw new EJBException("Received OAI Error response calling ListIdentifiers"); + } + } else { + ListIdentifiersType listIdentifiersType = oaiObj.getListIdentifiers(); + if (listIdentifiersType != null) { + resumptionToken = listIdentifiersType.getResumptionToken(); + for (Iterator it = listIdentifiersType.getHeader().iterator(); it.hasNext();) { + HeaderType header = (HeaderType) it.next(); + MutableBoolean getRecordErrorOccurred = new MutableBoolean(false); + Long studyId = getRecord(hdLogger, dataverse, header.getIdentifier(), dataverse.getHarvestFormatType().getMetadataPrefix(), getRecordErrorOccurred); + if (studyId != null) { + harvestedDatasetIds.add(studyId); + } + if (getRecordErrorOccurred.booleanValue()==true) { + failedIdentifiers.add(header.getIdentifier()); + } + + } + + } + } + String logMsg = "Returning from harvestFromIdentifiers"; + + if (resumptionToken == null) { + logMsg += " resumptionToken is null"; + } else if (!StringUtil.isEmpty(resumptionToken.getValue())) { + logMsg += " resumptionToken is " + resumptionToken.getValue(); + } else { + // Some OAIServers return an empty resumptionToken element when all + // the identifiers have been sent, so need to check for this, and + // treat it as if resumptiontoken is null. + logMsg += " resumptionToken is empty, setting return value to null."; + resumptionToken = null; + } + hdLogger.info(logMsg); + return resumptionToken; + } + */ + + /* + private void handleOAIError(Logger hdLogger, OAIPMHtype oaiObj, String message) { + for (Iterator it = oaiObj.getError().iterator(); it.hasNext();) { + OAIPMHerrorType error = (OAIPMHerrorType) it.next(); + message += ", error code: " + error.getCode(); + message += ", error value: " + error.getValue(); + hdLogger.log(Level.SEVERE, message); + + } + } + */ + + /* + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + public Long getRecord(HarvestingDataverse dataverse, String identifier, String metadataPrefix) { + return getRecord(logger, dataverse, identifier, metadataPrefix, null); + } + */ + + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + public Long getRecord(Logger hdLogger, Dataverse dataverse, String identifier, String metadataPrefix, MutableBoolean recordErrorOccurred) { + String errMessage = null; + + HarvestingDataverseConfig harvestingConfig = dataverse.getHarvestingDataverseConfig(); + + if (harvestingConfig == null) { + errMessage = "Could not find Harvesting Config for Dataverse id="+dataverse.getId(); + hdLogger.log(Level.SEVERE, errMessage); + return null; + } + + Dataset harvestedDataset = null; + String oaiUrl = harvestingConfig.getHarvestingUrl(); + try { + hdLogger.log(Level.INFO, "Calling GetRecord: oaiUrl =" + oaiUrl + "?verb=GetRecord&identifier=" + identifier + "&metadataPrefix=" + metadataPrefix); + + FastGetRecord record = new FastGetRecord(oaiUrl, identifier, metadataPrefix); + errMessage = record.getErrorMessage(); + //errMessage=null; + + if (errMessage != null) { + hdLogger.log(Level.SEVERE, "Error calling GetRecord - " + errMessage); + } else if (record.isDeleted()) { + hdLogger.log(Level.INFO, "Received 'deleted' status from OAI Server."); + Dataset dataset = null; //TODO: !!! datasetService.getDatasetByHarvestInfo(dataverse, identifier); + if (dataset != null) { + hdLogger.log(Level.INFO, "Deleting study " + dataset.getGlobalId()); + // TODO: !!! datasetService.deleteDataset(dataset.getId()); + } else { + hdLogger.log(Level.INFO, "No study found for this record, skipping delete. "); + } + + } else { + hdLogger.log(Level.INFO, "Successfully retreived GetRecord response."); + + + harvestedDataset = null; // TODO: !!! import + hdLogger.log(Level.INFO, "Harvest Successful for identifier " + identifier); + + this.processedSizeThisBatch += record.getMetadataFile().length(); + if ( this.harvestedDatasetIdsThisBatch == null ) { + this.harvestedDatasetIdsThisBatch = new ArrayList(); + } + this.harvestedDatasetIdsThisBatch.add(harvestedDataset.getId()); + + // reindexing in batches? - this is from DVN 3; + // we may not need it anymore. + if ( this.processedSizeThisBatch > 10000000 ) { + + hdLogger.log(Level.INFO, "REACHED CONTENT BATCH SIZE LIMIT; calling index ("+this.harvestedDatasetIdsThisBatch.size()+" studies in the batch)."); + //indexService.updateIndexList(this.harvestedDatasetIdsThisBatch); + hdLogger.log(Level.INFO, "REINDEX DONE."); + + + this.processedSizeThisBatch = 0; + this.harvestedDatasetIdsThisBatch = null; + } + } + } catch (Throwable e) { + errMessage = "Exception processing getRecord(), oaiUrl=" + oaiUrl + ",identifier=" + identifier + " " + e.getClass().getName() + " " + e.getMessage(); + hdLogger.log(Level.SEVERE, errMessage); + logException(e, hdLogger); + + } + + // If we got an Error from the OAI server or an exception happened during import, then + // set recordErrorOccurred to true (if recordErrorOccurred is being used) + // otherwise throw an exception (if recordErrorOccurred is not used, i.e null) + if (errMessage != null) { + if (recordErrorOccurred != null) { + recordErrorOccurred.setValue(true); + } else { + throw new EJBException(errMessage); + } + } + + return harvestedDataset != null ? harvestedDataset.getId() : null; + } + + + /* + public List getMetadataFormats(String oaiUrl) { + JAXBElement unmarshalObj; + try { + + Document doc = new ListMetadataFormats(oaiUrl).getDocument(); + JAXBContext jc = JAXBContext.newInstance("edu.harvard.hmdc.vdcnet.jaxb.oai"); + Unmarshaller unmarshaller = jc.createUnmarshaller(); + unmarshalObj = (JAXBElement) unmarshaller.unmarshal(doc); + } catch (TransformerException ex) { + throw new EJBException(ex); + } catch (ParserConfigurationException ex) { + throw new EJBException(ex); + } catch (JAXBException ex) { + throw new EJBException(ex); + } catch (SAXException ex) { + throw new EJBException(ex); + } catch (IOException ex) { + throw new EJBException(ex); + } + + OAIPMHtype OAIObj = (OAIPMHtype) unmarshalObj.getValue(); + if (OAIObj.getError()!=null && OAIObj.getError().size()>0) { + List errList = OAIObj.getError(); + String errMessage=""; + for (OAIPMHerrorType error : OAIObj.getError()){ + errMessage += error.getCode()+ " " +error.getValue(); + } + throw new EJBException(errMessage); + } + ListMetadataFormatsType listMetadataFormats = OAIObj.getListMetadataFormats(); + List formats = null; + if (listMetadataFormats != null) { + formats = new ArrayList(); + for (Iterator it = listMetadataFormats.getMetadataFormat().iterator(); it.hasNext();) { + // Object elem = it.next(); + MetadataFormatType elem = (MetadataFormatType) it.next(); + formats.add(elem.getMetadataPrefix()); + } + } + return formats; + } + */ + + /** + * + * SetDetailBean returned rather than the ListSetsType because we get strange errors when trying + * to refer to JAXB generated classes in both Web and EJB tiers. + */ + /* + public List getSets(String oaiUrl) { + JAXBElement unmarshalObj = null; + + try { + ListSets listSets = new ListSets(oaiUrl); + int nodeListLength = listSets.getErrors().getLength(); + if (nodeListLength==1) { + System.out.println("err Node: "+ listSets.getErrors().item(0)); + } + + + Document doc = new ListSets(oaiUrl).getDocument(); + JAXBContext jc = JAXBContext.newInstance("edu.harvard.hmdc.vdcnet.jaxb.oai"); + Unmarshaller unmarshaller = jc.createUnmarshaller(); + unmarshalObj = (JAXBElement) unmarshaller.unmarshal(doc); + } catch (ParserConfigurationException ex) { + throw new EJBException(ex); + } catch (SAXException ex) { + throw new EJBException(ex); + } catch (TransformerException ex) { + throw new EJBException(ex); + } catch (IOException ex) { + throw new EJBException(ex); + } catch (JAXBException ex) { + throw new EJBException(ex); + } + List sets = null; + Object value = unmarshalObj.getValue(); + + Package valPackage = value.getClass().getPackage(); + if (value instanceof edu.harvard.hmdc.vdcnet.jaxb.oai.OAIPMHtype) { + OAIPMHtype OAIObj = (OAIPMHtype) value; + if (OAIObj.getError()!=null && OAIObj.getError().size()>0 ) { + List errList = OAIObj.getError(); + String errMessage=""; + for (OAIPMHerrorType error : OAIObj.getError()){ + // NO_SET_HIERARCHY is not an error from the perspective of the DVN, + // it just means that the OAI server doesn't support sets. + if (!error.getCode().equals(OAIPMHerrorcodeType.NO_SET_HIERARCHY)) { + errMessage += error.getCode()+ " " +error.getValue(); + } + } + if (errMessage!="") { + throw new EJBException(errMessage); + } + + } + + ListSetsType listSetsType = OAIObj.getListSets(); + if (listSetsType != null) { + sets = new ArrayList(); + for (Iterator it = listSetsType.getSet().iterator(); it.hasNext();) { + SetType elem = (SetType) it.next(); + SetDetailBean setDetail = new SetDetailBean(); + setDetail.setName(elem.getSetName()); + setDetail.setSpec(elem.getSetSpec()); + sets.add(setDetail); + } + } + } + return sets; + } + */ + + + private void logException(Throwable e, Logger logger) { + + boolean cause = false; + String fullMessage = ""; + do { + String message = e.getClass().getName() + " " + e.getMessage(); + if (cause) { + message = "\nCaused By Exception.................... " + e.getClass().getName() + " " + e.getMessage(); + } + StackTraceElement[] ste = e.getStackTrace(); + message += "\nStackTrace: \n"; + for (int m = 0; m < ste.length; m++) { + message += ste[m].toString() + "\n"; + } + fullMessage += message; + cause = true; + } while ((e = e.getCause()) != null); + logger.severe(fullMessage); + } + + /* + Most likely not needed any more: + public List findAllHarvestFormatTypes() { + String queryStr = "SELECT f FROM HarvestFormatType f"; + Query query = em.createQuery(queryStr); + return query.getResultList(); + } + + public HarvestFormatType findHarvestFormatTypeByMetadataPrefix(String metadataPrefix) { + String queryStr = "SELECT f FROM HarvestFormatType f WHERE f.metadataPrefix = '" + metadataPrefix + "'"; + Query query = em.createQuery(queryStr); + List resultList = query.getResultList(); + HarvestFormatType hft = null; + if (resultList.size() > 1) { + throw new EJBException("More than one HarvestFormatType found with metadata Prefix= '" + metadataPrefix + "'"); + } + if (resultList.size() == 1) { + hft = (HarvestFormatType) resultList.get(0); + } + return hft; + } +*/ + + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java new file mode 100644 index 00000000000..6a4159d11a8 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java @@ -0,0 +1,226 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.timer; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.HarvestingDataverseConfig; +import edu.harvard.iq.dataverse.harvest.client.HarvestTimerInfo; +import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; +import java.io.Serializable; +import java.net.InetAddress; +import java.net.UnknownHostException; +import java.util.Calendar; +import java.util.Date; +import java.util.Iterator; +import java.util.logging.Level; +import java.util.logging.Logger; +import javax.annotation.Resource; +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.ejb.Timeout; +import javax.ejb.Timer; +import javax.ejb.TransactionAttribute; +import javax.ejb.TransactionAttributeType; +import javax.persistence.EntityManager; +import javax.persistence.PersistenceContext; + +/** + * + * @author roberttreacy + */ +@Stateless +public class DataverseTimerServiceBean implements Serializable { + @Resource + javax.ejb.TimerService timerService; + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.timer.DataverseTimerServiceBean"); + @EJB + HarvesterServiceBean harvesterService; + @EJB + DataverseServiceBean dataverseService; + + /*@EJB + StudyServiceLocal studyService;*/ + + + public void createTimer(Date initialExpiration, long intervalDuration, Serializable info) { + try { + logger.log(Level.INFO,"Creating timer on " + InetAddress.getLocalHost().getCanonicalHostName()); + } catch (UnknownHostException ex) { + Logger.getLogger(DataverseTimerServiceBean.class.getName()).log(Level.SEVERE, null, ex); + } + timerService.createTimer(initialExpiration, intervalDuration, info); + } + + + /** + * This method is called whenever an EJB Timer goes off. + * Check to see if this is a Harvest Timer, and if it is + * Run the harvest for the given (scheduled) dataverse + * @param timer + */ + @Timeout + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) + public void handleTimeout(javax.ejb.Timer timer) { + // We have to put all the code in a try/catch block because + // if an exception is thrown from this method, Glassfish will automatically + // call the method a second time. (The minimum number of re-tries for a Timer method is 1) + + try { + logger.log(Level.INFO,"Handling timeout on " + InetAddress.getLocalHost().getCanonicalHostName()); + } catch (UnknownHostException ex) { + Logger.getLogger(DataverseTimerServiceBean.class.getName()).log(Level.SEVERE, null, ex); + } + if (timer.getInfo() instanceof HarvestTimerInfo) { + HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); + try { + + logger.log(Level.INFO, "DO HARVESTING of dataverse " + info.getHarvestingDataverseId()); + harvesterService.doHarvesting(info.getHarvestingDataverseId()); + + } catch (Throwable e) { + dataverseService.setHarvestResult(info.getHarvestingDataverseId(), harvesterService.HARVEST_RESULT_FAILED); + //mailService.sendHarvestErrorNotification(dataverseService.find().getSystemEmail(), dataverseService.find().getName()); + logException(e, logger); + } + } + /* Export timers: (not yet implemented!) -- L.A. + if (timer.getInfo() instanceof ExportTimerInfo) { + try { + ExportTimerInfo info = (ExportTimerInfo) timer.getInfo(); + logger.info("handling timeout"); + studyService.exportUpdatedStudies(); + } catch (Throwable e) { + mailService.sendExportErrorNotification(vdcNetworkService.find().getSystemEmail(), vdcNetworkService.find().getName()); + logException(e, logger); + } + } + */ + + } + + public void removeHarvestTimers() { + // Remove all the harvest timers, if exist: + // + // (the logging messages below are set to level INFO; it's ok, + // since this code is only called on startup of the application, + // and it may be useful to know what existing timers were encountered). + + logger.log(Level.INFO,"Removing existing harvest timers.."); + + int i = 1; + for (Iterator it = timerService.getTimers().iterator(); it.hasNext();) { + + Timer timer = (Timer) it.next(); + logger.log(Level.INFO, "HarvesterService: checking timer "+i); + + if (timer.getInfo() instanceof HarvestTimerInfo) { + logger.log(Level.INFO, "HarvesterService: timer "+i+" is a harvesting one; removing."); + timer.cancel(); + } + + i++; + } + } + + private void createHarvestTimer(Dataverse harvestedDataverse) { + HarvestingDataverseConfig harvestedDataverseConfig = harvestedDataverse.getHarvestingDataverseConfig(); + + if (harvestedDataverseConfig == null) { + logger.info("ERROR: No Harvesting Configuration found for dataverse id="+harvestedDataverse.getId()); + return; + } + + if (harvestedDataverseConfig.isScheduled()) { + long intervalDuration = 0; + Calendar initExpiration = Calendar.getInstance(); + initExpiration.set(Calendar.MINUTE, 0); + initExpiration.set(Calendar.SECOND, 0); + if (harvestedDataverseConfig.getSchedulePeriod().equals(HarvestingDataverseConfig.SCHEDULE_PERIOD_DAILY)) { + intervalDuration = 1000 * 60 * 60 * 24; + initExpiration.set(Calendar.HOUR_OF_DAY, harvestedDataverseConfig.getScheduleHourOfDay()); + + } else if (harvestedDataverseConfig.getSchedulePeriod().equals(harvestedDataverseConfig.SCHEDULE_PERIOD_WEEKLY)) { + intervalDuration = 1000 * 60 * 60 * 24 * 7; + initExpiration.set(Calendar.HOUR_OF_DAY, harvestedDataverseConfig.getScheduleHourOfDay()); + initExpiration.set(Calendar.DAY_OF_WEEK, harvestedDataverseConfig.getScheduleDayOfWeek()); + + } else { + logger.log(Level.WARNING, "Could not set timer for harvestedDataverse id, " + harvestedDataverse.getId() + ", unknown schedule period: " + harvestedDataverseConfig.getSchedulePeriod()); + return; + } + Date initExpirationDate = initExpiration.getTime(); + Date currTime = new Date(); + if (initExpirationDate.before(currTime)) { + initExpirationDate.setTime(initExpiration.getTimeInMillis() + intervalDuration); + } + logger.log(Level.INFO, "Setting timer for dataverse " + harvestedDataverse.getName() + ", initial expiration: " + initExpirationDate); + createTimer(initExpirationDate, intervalDuration, new HarvestTimerInfo(harvestedDataverse.getId(), harvestedDataverse.getName(), harvestedDataverseConfig.getSchedulePeriod(), harvestedDataverseConfig.getScheduleHourOfDay(), harvestedDataverseConfig.getScheduleDayOfWeek())); + } + } + + public void updateHarvestTimer(Dataverse harvestedDataverse) { + removeHarvestTimer(harvestedDataverse); + createHarvestTimer(harvestedDataverse); + } + + + public void removeHarvestTimer(Dataverse harvestedDataverse) { + // Clear dataverse timer, if one exists + try { + logger.log(Level.INFO,"Removing harvest timer on " + InetAddress.getLocalHost().getCanonicalHostName()); + } catch (UnknownHostException ex) { + Logger.getLogger(DataverseTimerServiceBean.class.getName()).log(Level.SEVERE, null, ex); + } + for (Iterator it = timerService.getTimers().iterator(); it.hasNext();) { + Timer timer = (Timer) it.next(); + if (timer.getInfo() instanceof HarvestTimerInfo) { + HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); + if (info.getHarvestingDataverseId().equals(harvestedDataverse.getId())) { + timer.cancel(); + } + } + } + } + + public void createExportTimer() { + /* Not yet implemented. The DVN 3 implementation can be used as a model */ + + } + + public void createExportTimer(Dataverse dataverse) { + /* Not yet implemented. The DVN 3 implementation can be used as a model */ + + } + + public void removeExportTimer() { + /* Not yet implemented. The DVN 3 implementation can be used as a model */ + } + + /* Utility methods: */ + private void logException(Throwable e, Logger logger) { + + boolean cause = false; + String fullMessage = ""; + do { + String message = e.getClass().getName() + " " + e.getMessage(); + if (cause) { + message = "\nCaused By Exception.................... " + e.getClass().getName() + " " + e.getMessage(); + } + StackTraceElement[] ste = e.getStackTrace(); + message += "\nStackTrace: \n"; + for (int m = 0; m < ste.length; m++) { + message += ste[m].toString() + "\n"; + } + fullMessage += message; + cause = true; + } while ((e = e.getCause()) != null); + logger.severe(fullMessage); + } + +} \ No newline at end of file From 91315ed4d3a5b33b66332cddbe345ae2147d0a0c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 18 Apr 2016 18:13:36 -0400 Subject: [PATCH 05/37] Initial framework for the Harvesting REST API. --- .../iq/dataverse/DataverseServiceBean.java | 31 +++-- .../dataverse/HarvestingDataverseConfig.java | 54 ++++++++- .../iq/dataverse/api/BatchServiceBean.java | 2 +- .../harvard/iq/dataverse/api/Harvesting.java | 65 +++++++++++ .../harvest/client/HarvesterServiceBean.java | 109 +++++------------- .../timer/DataverseTimerServiceBean.java | 2 +- 6 files changed, 163 insertions(+), 100 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index ee08517fc96..33ce58ff250 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -548,15 +548,19 @@ public void setHarvestSuccess(Long hdId, Date currentTime, int harvestedCount, i Dataverse hd = em.find(Dataverse.class, hdId); em.refresh(hd); if (hd.isHarvested()) { - /* TODO: - hd.getHarvestingDataverseConfig().setLastSuccessfulHarvestTime(currentTime); - hd.getHarvestingDataverseConfig().setHarvestedStudyCount(new Long(harvestedCount)); - hd.getHarvestingDataverseConfig().setFailedStudyCount(new Long(failedCount)); - */ + hd.getHarvestingDataverseConfig().setLastHarvestTime(currentTime); + hd.getHarvestingDataverseConfig().setLastSuccessfulHarvestTime(currentTime); hd.getHarvestingDataverseConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_SUCCESS); + + if (harvestedCount > 0 || failedCount > 0) { + hd.getHarvestingDataverseConfig().setLastNonEmptyHarvestTime(currentTime); + hd.getHarvestingDataverseConfig().setHarvestedDatasetCount(new Long(harvestedCount)); + hd.getHarvestingDataverseConfig().setFailedDatasetCount(new Long(failedCount)); + /*TODO: record the number of deleted datasets! */ + } } } - +/* @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void setHarvestSuccessNotEmpty(Long hdId, Date currentTime, int harvestedCount, int failedCount) { Dataverse hd = em.find(Dataverse.class, hdId); @@ -566,22 +570,17 @@ public void setHarvestSuccessNotEmpty(Long hdId, Date currentTime, int harvested hd.getHarvestingDataverseConfig().setLastSuccessfulNonZeroHarvestTime(currentTime); hd.getHarvestingDataverseConfig().setHarvestedStudyCountNonZero(new Long(harvestedCount)); hd.getHarvestingDataverseConfig().setFailedStudyCountNonZero(new Long(failedCount)); - */ + * } - } + }*/ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestFailure(Long hdId, int harvestedStudyCount, int failedCount) { + public void setHarvestFailure(Long hdId, Date currentTime) { Dataverse hd = em.find(Dataverse.class, hdId); em.refresh(hd); if (hd.isHarvested()) { - /* TODO: - hd.getHarvestingDataverseConfig().setHarvestedStudyCount(new Long(harvestedStudyCount)); - hd.getHarvestingDataverseConfig().setFailedStudyCount(new Long(failedCount)); - */ + hd.getHarvestingDataverseConfig().setLastHarvestTime(currentTime); hd.getHarvestingDataverseConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_FAILED); } - - } - + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java index 6ded994902d..9d2537375e1 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java @@ -168,6 +168,10 @@ public void setHarvestResult(String harvestResult) { this.harvestResult = harvestResult; } + // "Last Harvest Time" is the last time we *attempted* to harvest + // from this remote resource. + // It wasn't necessarily a successful attempt! + @Temporal(value = TemporalType.TIMESTAMP) private Date lastHarvestTime; @@ -179,6 +183,12 @@ public void setLastHarvestTime(Date lastHarvestTime) { this.lastHarvestTime = lastHarvestTime; } + // This the last "successful harvest" - i.e., the last time we + // tried to harvest, and got a response from the remote server. + // We may not have necessarily harvested any useful content though; + // the result may have been a "no content" or "no changes since the last harvest" + // response. + @Temporal(value = TemporalType.TIMESTAMP) private Date lastSuccessfulHarvestTime; @@ -190,12 +200,48 @@ public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) { this.lastSuccessfulHarvestTime = lastSuccessfulHarvestTime; } + // Finally, this is the time stamp from the last "non-empty" harvest. + // I.e. the last time we ran a harvest that actually resulted in + // some Datasets created, updated or deleted: + + private Date lastNonEmptyHarvestTime; + @Temporal(value = TemporalType.TIMESTAMP) + public Date getLastNonEmptyHarvestTime() { + return lastNonEmptyHarvestTime; + } + + public void setLastNonEmptyHarvestTime(Date lastNonEmptyHarvestTime) { + this.lastNonEmptyHarvestTime = lastNonEmptyHarvestTime; + } + + // And these are the Dataset counts from that last "non-empty" harvest: private Long harvestedDatasetCount; private Long failedDatasetCount; - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastSuccessfulNonEmptyHarvestTime; - private Long lastNonEmptyHarvestedDatasetCount; - private Long lastNonEmptyFailedDatasetCount; + private Long deletedDatasetCount; + + public Long getHarvestedDatasetCount() { + return harvestedDatasetCount; + } + + public void setHarvestedDatasetCount(Long harvestedDatasetCount) { + this.harvestedDatasetCount = harvestedDatasetCount; + } + + public Long getFailedDatasetCount() { + return failedDatasetCount; + } + + public void setFailedDatasetCount(Long failedDatasetCount) { + this.failedDatasetCount = failedDatasetCount; + } + + public Long getDeletedDatasetCount() { + return deletedDatasetCount; + } + + public void setDeletedDatasetCount(Long deletedDatasetCount) { + this.deletedDatasetCount = deletedDatasetCount; + } private boolean scheduled; diff --git a/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java index 7bd0635cf7f..d41fed2652d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java @@ -5,7 +5,6 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.api.imports.ImportException; import edu.harvard.iq.dataverse.api.imports.ImportUtil; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import java.io.File; import java.io.FileWriter; @@ -107,5 +106,6 @@ public JsonArrayBuilder handleDirectory(DataverseRequest dataverseRequest, File } return status; } + } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java new file mode 100644 index 00000000000..7d2b91af87f --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java @@ -0,0 +1,65 @@ +package edu.harvard.iq.dataverse.api; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.DataverseServiceBean; + +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; +import java.io.IOException; +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.ws.rs.GET; +import javax.ws.rs.Path; +import javax.ws.rs.PathParam; +import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Response; + +@Stateless +@Path("harvest") +public class Harvesting extends AbstractApiBean { + + + @EJB + DataverseServiceBean dataverseService; + @EJB + HarvesterServiceBean harvesterService; + + + @GET + @Path("run/{dataverseAlias}") + public Response startHarvestingJob(@PathParam("dataverseAlias") String dataverseAlias, @QueryParam("key") String apiKey) throws IOException { + + try { + AuthenticatedUser authenticatedUser = null; + + try { + authenticatedUser = findAuthenticatedUserOrDie(); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } + + if (authenticatedUser == null || !authenticatedUser.isSuperuser()) { + return errorResponse(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs"); + } + + Dataverse dataverse = dataverseService.findByAlias(dataverseAlias); + + if (dataverse == null) { + return errorResponse(Response.Status.NOT_FOUND, "No such dataverse: "+dataverseAlias); + } + + if (!dataverse.isHarvested()) { + return errorResponse(Response.Status.BAD_REQUEST, "Not a HARVESTING dataverse: "+dataverseAlias); + } + + //DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser); + + harvesterService.doAsyncHarvest(dataverse); + + } catch (Exception e) { + return this.errorResponse(Response.Status.BAD_REQUEST, "Exception thrown when running a Harvest on dataverse \""+dataverseAlias+"\" via REST API; " + e.getMessage()); + } + return this.accepted(); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 5f7cfb98e4a..deb33234916 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -29,6 +29,7 @@ import java.util.logging.Level; import java.util.logging.Logger; import javax.annotation.Resource; +import javax.ejb.Asynchronous; import javax.ejb.EJB; import javax.ejb.EJBException; import javax.ejb.Stateless; @@ -86,21 +87,15 @@ public HarvesterServiceBean() { /** * Called to run an "On Demand" harvest. - * This method creates a timer that will go off immediately, - * which will start an immediate asynchronous harvest. - * @param dataverse */ - public void doAsyncHarvest(Dataverse harvestedDataverse) { - HarvestingDataverseConfig harvestedDataverseConfig = harvestedDataverse.getHarvestingDataverseConfig(); + @Asynchronous + public void doAsyncHarvest(Dataverse harvestingDataverse) { - if (harvestedDataverseConfig == null) { - logger.info("ERROR: No Harvesting Configuration found for dataverse id="+harvestedDataverse.getId()); - return; + try { + doHarvest(harvestingDataverse.getId()); + } catch (Exception e) { + logger.info("Caught exception running an asynchronous harvest (dataverse \""+harvestingDataverse.getAlias()+"\")"); } - - Calendar cal = Calendar.getInstance(); - - timerService.createTimer(cal.getTime(), new HarvestTimerInfo(harvestedDataverse.getId(), harvestedDataverse.getName(), harvestedDataverseConfig.getSchedulePeriod(), harvestedDataverseConfig.getScheduleHourOfDay(), harvestedDataverseConfig.getScheduleDayOfWeek())); } public void createScheduledHarvestTimers() { @@ -176,50 +171,12 @@ private void createHarvestTimer(Dataverse harvestingDataverse) { dataverseTimerService.createTimer(initExpirationDate, intervalDuration, new HarvestTimerInfo(harvestingDataverse.getId(), harvestingDataverse.getName(), harvestingDataverseConfig.getSchedulePeriod(), harvestingDataverseConfig.getScheduleHourOfDay(), harvestingDataverseConfig.getScheduleDayOfWeek())); } } - - /** - * This method is called whenever an EJB Timer goes off. - * Check to see if this is a Harvest Timer, and if it is - * Run the harvest for the given (scheduled) dataverse - * @param timer - */ - @Timeout - @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) - public void handleTimeout(javax.ejb.Timer timer) { - // We have to put all the code in a try/catch block because - // if an exception is thrown from this method, Glassfish will automatically - // call the method a second time. (The minimum number of re-tries for a Timer method is 1) - - if (timer.getInfo() instanceof HarvestTimerInfo) { - HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); - try { - // First, check if we are in read-only mode: - /* - if (...) { - logger.log(Level.ALL, "Dataverse is in read-only mode."); - return; - - } - */ - - // Proceeding with the scheduled harvest: - - logger.log(Level.INFO, "DO HARVESTING of dataverse " + info.getHarvestingDataverseId()); - doHarvesting(info.getHarvestingDataverseId()); - - } catch (Throwable e) { - dataverseService.setHarvestResult(info.getHarvestingDataverseId(), this.HARVEST_RESULT_FAILED); - /*mailService.sendHarvestErrorNotification(...getSystemEmail(), ...);*/ - logException(e, logger); - } - } - } /** - * Harvest an individual Dataverse + * Run a harvest for an individual harvesting Dataverse * @param dataverseId */ - public void doHarvesting(Long dataverseId) throws IOException { + public void doHarvest(Long dataverseId) throws IOException { Dataverse harvestingDataverse = dataverseService.find(dataverseId); if (harvestingDataverse == null) { @@ -244,9 +201,11 @@ public void doHarvesting(Long dataverseId) throws IOException { this.harvestedDatasetIdsThisBatch = new ArrayList(); List failedIdentifiers = new ArrayList(); + Date harvestStartTime = new Date(); + try { boolean harvestingNow = harvestingDataverseConfig.isHarvestingNow(); - + if (harvestingNow) { harvestErrorOccurred.setValue(true); hdLogger.log(Level.SEVERE, "Cannot begin harvesting, Dataverse " + harvestingDataverse.getName() + " is currently being harvested."); @@ -259,29 +218,24 @@ public void doHarvesting(Long dataverseId) throws IOException { if (lastSuccessfulHarvestTime != null) { from = formatter.format(lastSuccessfulHarvestTime); } - dataverseService.setHarvestInProgress(harvestingDataverse.getId(), true); - Date currentTime = new Date(); - dataverseService.setLastHarvestTime(harvestingDataverse.getId(), currentTime); - - hdLogger.log(Level.INFO, "BEGIN HARVEST..., oaiUrl=" + harvestingDataverseConfig.getArchiveUrl() + ",set=" + harvestingDataverseConfig.getHarvestingSet() + ", metadataPrefix=" + harvestingDataverseConfig.getMetadataPrefix() + ", from=" + from + ", until=" + until); + dataverseService.setHarvestInProgress(harvestingDataverse.getId(), true); - if (harvestingDataverseConfig.isOai()) { - harvestedDatasetIds = harvestOAI(harvestingDataverse, hdLogger, from, until, harvestErrorOccurred, failedIdentifiers); + dataverseService.setLastHarvestTime(harvestingDataverse.getId(), harvestStartTime); - } else { - throw new IOException("Unsupported harvest type"); - } - dataverseService.setHarvestSuccess(harvestingDataverse.getId(),currentTime, harvestedDatasetIds.size(), failedIdentifiers.size()); - hdLogger.log(Level.INFO, "COMPLETED HARVEST, server=" + harvestingDataverseConfig.getArchiveUrl() + ", metadataPrefix=" + harvestingDataverseConfig.getMetadataPrefix()); + hdLogger.log(Level.INFO, "BEGIN HARVEST..., oaiUrl=" + harvestingDataverseConfig.getArchiveUrl() + ",set=" + harvestingDataverseConfig.getHarvestingSet() + ", metadataPrefix=" + harvestingDataverseConfig.getMetadataPrefix() + ", from=" + from + ", until=" + until); - /* Last "non-empty" harvest: */ - if (harvestedDatasetIds.size() > 0) { - dataverseService.setHarvestSuccessNotEmpty(harvestingDataverse.getId(),currentTime, harvestedDatasetIds.size(), failedIdentifiers.size()); - hdLogger.log(Level.INFO, "COMPLETED HARVEST with results"); - } - - // now index all studies (need to modify for update) - /* (TODO: !!!) + if (harvestingDataverseConfig.isOai()) { + harvestedDatasetIds = harvestOAI(harvestingDataverse, hdLogger, from, until, harvestErrorOccurred, failedIdentifiers); + + } else { + throw new IOException("Unsupported harvest type"); + } + dataverseService.setHarvestSuccess(harvestingDataverse.getId(), harvestStartTime, harvestedDatasetIds.size(), failedIdentifiers.size()); + hdLogger.log(Level.INFO, "COMPLETED HARVEST, server=" + harvestingDataverseConfig.getArchiveUrl() + ", metadataPrefix=" + harvestingDataverseConfig.getMetadataPrefix()); + hdLogger.log(Level.INFO, "Datasets created/updated: " + harvestedDatasetIds.size() + ", datasets deleted: [TODO:], datasets failed: " + failedIdentifiers.size()); + + // now index all studies (need to modify for update) + /* (TODO: !!!) if (this.processedSizeThisBatch > 0) { hdLogger.log(Level.INFO, "POST HARVEST, reindexing the remaining studies."); if (this.harvestedDatasetIdsThisBatch != null) { @@ -293,18 +247,17 @@ public void doHarvesting(Long dataverseId) throws IOException { } else { hdLogger.log(Level.INFO, "(All harvested content already reindexed)"); } - */ + */ } //mailService.sendHarvestNotification(...getSystemEmail(), harvestingDataverse.getName(), logFileName, logTimestamp, harvestErrorOccurred.booleanValue(), harvestedDatasetIds.size(), failedIdentifiers); - } catch (Throwable e) { + } catch (Throwable e) { harvestErrorOccurred.setValue(true); String message = "Exception processing harvest, server= " + harvestingDataverseConfig.getArchiveUrl() + ",format=" + harvestingDataverseConfig.getMetadataPrefix() + " " + e.getClass().getName() + " " + e.getMessage(); hdLogger.log(Level.SEVERE, message); logException(e, hdLogger); hdLogger.log(Level.INFO, "HARVEST NOT COMPLETED DUE TO UNEXPECTED ERROR."); - dataverseService.setHarvestFailure(harvestingDataverse.getId(), harvestedDatasetIds.size(), failedIdentifiers.size()); - - + dataverseService.setHarvestFailure(harvestingDataverse.getId(), harvestStartTime); + } finally { dataverseService.setHarvestInProgress(harvestingDataverse.getId(), false); fileHandler.close(); diff --git a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java index 6a4159d11a8..106eefa5753 100644 --- a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java @@ -81,7 +81,7 @@ public void handleTimeout(javax.ejb.Timer timer) { try { logger.log(Level.INFO, "DO HARVESTING of dataverse " + info.getHarvestingDataverseId()); - harvesterService.doHarvesting(info.getHarvestingDataverseId()); + harvesterService.doHarvest(info.getHarvestingDataverseId()); } catch (Throwable e) { dataverseService.setHarvestResult(info.getHarvestingDataverseId(), harvesterService.HARVEST_RESULT_FAILED); From b6845e067f34fd39c9ddf9674d6acd548296c05a Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 19 Apr 2016 16:43:35 -0400 Subject: [PATCH 06/37] Removed unused imports from the harvester service bean. --- .../harvest/client/HarvesterServiceBean.java | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index deb33234916..1ee6009c975 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -12,13 +12,8 @@ import edu.harvard.iq.dataverse.HarvestingDataverseConfig; import edu.harvard.iq.dataverse.timer.DataverseTimerServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; -import java.io.BufferedWriter; import java.io.File; -import java.io.FileWriter; import java.io.IOException; -import java.io.Writer; -import java.net.MalformedURLException; -import java.net.URLEncoder; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; @@ -33,21 +28,15 @@ import javax.ejb.EJB; import javax.ejb.EJBException; import javax.ejb.Stateless; -import javax.ejb.Timeout; import javax.ejb.Timer; import javax.ejb.TransactionAttribute; import javax.ejb.TransactionAttributeType; import javax.faces.bean.ManagedBean; import javax.inject.Named; -import javax.persistence.Query; -import javax.xml.bind.JAXBContext; -import javax.xml.bind.JAXBElement; -import javax.xml.bind.JAXBException; import javax.xml.bind.Unmarshaller; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.TransformerException; import org.apache.commons.lang.mutable.MutableBoolean; -import org.w3c.dom.Document; import org.xml.sax.SAXException; /** @@ -76,7 +65,6 @@ public class HarvesterServiceBean { public static final String HARVEST_RESULT_FAILED="failed"; - private JAXBContext jaxbContext; private Unmarshaller unmarshaller; private long processedSizeThisBatch = 0; @@ -274,7 +262,7 @@ public void doHarvest(Long dataverseId) throws IOException { * @param failedIdentifiers Study Identifiers for failed "GetRecord" requests */ private List harvestOAI(Dataverse dataverse, Logger hdLogger, String from, String until, MutableBoolean harvestErrorOccurred, List failedIdentifiers) - throws IOException, ParserConfigurationException,SAXException, TransformerException, JAXBException { + throws IOException, ParserConfigurationException,SAXException, TransformerException { List harvestedDatasetIds = new ArrayList(); From 340b944e688ba81e9cab093d639d4d056f8dd4a3 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 21 Apr 2016 21:49:30 -0400 Subject: [PATCH 07/37] new /api/harvest/status method with JSON output. --- .../dataverse/HarvestingDataverseConfig.java | 26 ++++++-- .../harvard/iq/dataverse/api/Harvesting.java | 60 ++++++++++++++++++- 2 files changed, 79 insertions(+), 7 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java index 9d2537375e1..9ef7a49b802 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java @@ -27,15 +27,17 @@ * * @author Leonid Andreev */ -@Entity + @Table(indexes = {@Index(columnList="dataverse_id") , @Index(columnList="harvesttype") , @Index(columnList="harveststyle") , @Index(columnList="harvestingurl")}) +@Entity public class HarvestingDataverseConfig implements Serializable { private static final long serialVersionUID = 1L; + @Id - @GeneratedValue(strategy = GenerationType.AUTO) + @GeneratedValue(strategy = GenerationType.IDENTITY) private Long id; public Long getId() { @@ -45,7 +47,7 @@ public Long getId() { public void setId(Long id) { this.id = id; } - + public static final String HARVEST_TYPE_OAI="oai"; public static final String HARVEST_TYPE_NESSTAR="nesstar"; @@ -83,7 +85,17 @@ public void setDataverse(Dataverse dataverse) { this.dataverse = dataverse; } - String harvestType; + private String name; + + public String getName() { + return name; + } + + public void setName(String name) { + this.name = name; + } + + private String harvestType; public String getHarvestType() { return harvestType; @@ -93,11 +105,12 @@ public void setHarvestType(String harvestType) { this.harvestType = harvestType; } + public boolean isOai() { return HARVEST_TYPE_OAI.equals(harvestType); } - String harvestStyle; + private String harvestStyle; public String getHarvestStyle() { return harvestStyle; @@ -204,8 +217,9 @@ public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) { // I.e. the last time we ran a harvest that actually resulted in // some Datasets created, updated or deleted: - private Date lastNonEmptyHarvestTime; @Temporal(value = TemporalType.TIMESTAMP) + private Date lastNonEmptyHarvestTime; + public Date getLastNonEmptyHarvestTime() { return lastNonEmptyHarvestTime; } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java index 7d2b91af87f..818bbef4bbc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java @@ -2,12 +2,20 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.HarvestingDataverseConfig; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; +import javax.json.JsonObjectBuilder; +import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; +import edu.harvard.iq.dataverse.util.json.JsonPrinter; import java.io.IOException; +import java.math.BigDecimal; +import java.util.List; import javax.ejb.EJB; import javax.ejb.Stateless; +import javax.json.Json; +import javax.json.JsonArrayBuilder; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.PathParam; @@ -35,7 +43,7 @@ public Response startHarvestingJob(@PathParam("dataverseAlias") String dataverse try { authenticatedUser = findAuthenticatedUserOrDie(); } catch (WrappedResponse wr) { - return wr.getResponse(); + return errorResponse(Response.Status.UNAUTHORIZED, "Authentication required to use this API method"); } if (authenticatedUser == null || !authenticatedUser.isSuperuser()) { @@ -61,5 +69,55 @@ public Response startHarvestingJob(@PathParam("dataverseAlias") String dataverse } return this.accepted(); } + + /* + * /api/harvest/status + * will, by default, return a JSON record with the information about the + * configured remote archives. + * optionally, plain text output will [/may] be provided as well. + */ + @GET + @Path("status") + public Response harvestingStatus() throws IOException { + //return this.accepted(); + + List harvestingDataverses = dataverseService.getAllHarvestedDataverses(); + if (harvestingDataverses == null) { + return okResponse(""); + } + + return okResponse(jsonObjectBuilder().add("remoteArchives", harvestingConfigsAsJsonArray(harvestingDataverses))); + } + public static JsonArrayBuilder harvestingConfigsAsJsonArray(List harvestingDataverses) { + JsonArrayBuilder hdArr = Json.createArrayBuilder(); + + for (Dataverse hd : harvestingDataverses) { + hdArr.add(harvestingConfigAsJson(hd)); + } + return hdArr; + } + + public static JsonObjectBuilder harvestingConfigAsJson(Dataverse dataverse) { + HarvestingDataverseConfig harvestingConfig = dataverse.getHarvestingDataverseConfig(); + if (harvestingConfig == null) { + return null; + } + + return jsonObjectBuilder().add("nickname", harvestingConfig.getName()). + add("dataverseAlias", dataverse.getAlias()). + add("type", harvestingConfig.getHarvestType()). + add("harvestURL", harvestingConfig.getHarvestingUrl()). + add("metadataFormat", harvestingConfig.getMetadataPrefix()). + add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()). + add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none"). + add("inProgress", harvestingConfig.isHarvestingNow() ? "yes" : "-"). + add("lastHarvest", harvestingConfig.getLastHarvestTime() == null ? "N/A" : harvestingConfig.getLastHarvestTime().toString()). + add("lastSuccessful", harvestingConfig.getLastSuccessfulHarvestTime() == null ? "N/A" : harvestingConfig.getLastSuccessfulHarvestTime().toString()). + add("lastNonEmpty", harvestingConfig.getLastNonEmptyHarvestTime() == null ? "N/A" : harvestingConfig.getLastNonEmptyHarvestTime().toString()). + add("lastResult", harvestingConfig.getHarvestResult()). + add("datasetsHarveted", harvestingConfig.getHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getHarvestedDatasetCount().toString()). + add("datasetsDeleted", harvestingConfig.getDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getDeletedDatasetCount().toString()). + add("datasetsFailed", harvestingConfig.getFailedDatasetCount() == null ? "N/A" : harvestingConfig.getFailedDatasetCount().toString()); + } } From e4b90e5ec078655ba18737a7b6a2599c13efbf8c Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Tue, 26 Apr 2016 19:16:01 -0400 Subject: [PATCH 08/37] Refactored the code, replacing "HarvestingDataverseConfig" with "HarvestingClient". --- .../edu/harvard/iq/dataverse/Dataset.java | 29 ++-- .../harvard/iq/dataverse/DatasetVersion.java | 7 +- .../edu/harvard/iq/dataverse/Dataverse.java | 13 +- .../iq/dataverse/DataverseServiceBean.java | 30 ++-- .../harvard/iq/dataverse/api/Datasets.java | 3 + .../harvard/iq/dataverse/api/Dataverses.java | 3 + .../harvard/iq/dataverse/api/Harvesting.java | 4 +- .../impl/CreateHarvestingClientCommand.java | 33 +++++ .../impl/DeleteHarvestingClientCommand.java | 38 +++++ .../impl/UpdateHarvestingClientCommand.java | 33 +++++ .../harvest/client/HarvesterServiceBean.java | 9 +- .../client/HarvestingClient.java} | 13 +- .../harvest/client/HarvestingClientRun.java | 132 ++++++++++++++++++ .../client/HarvestingClientServiceBean.java | 24 ++++ .../timer/DataverseTimerServiceBean.java | 6 +- 15 files changed, 324 insertions(+), 53 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java rename src/main/java/edu/harvard/iq/dataverse/{HarvestingDataverseConfig.java => harvest/client/HarvestingClient.java} (96%) create mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientRun.java create mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 6b019ab9a4a..3c0fd8e70d8 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import java.nio.file.Path; import java.nio.file.Paths; import java.text.SimpleDateFormat; @@ -521,24 +522,24 @@ public boolean isHarvested() { public String getRemoteArchiveURL() { if (isHarvested()) { - if (HarvestingDataverseConfig.HARVEST_STYLE_DATAVERSE.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { - return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl() + "/dataset.xhtml?persistentId=" + getGlobalId(); - } else if (HarvestingDataverseConfig.HARVEST_STYLE_VDC.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { - String rootArchiveUrl = this.getOwner().getHarvestingDataverseConfig().getHarvestingUrl(); + if (HarvestingClient.HARVEST_STYLE_DATAVERSE.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { + return this.getOwner().getHarvestingClientConfig().getArchiveUrl() + "/dataset.xhtml?persistentId=" + getGlobalId(); + } else if (HarvestingClient.HARVEST_STYLE_VDC.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { + String rootArchiveUrl = this.getOwner().getHarvestingClientConfig().getHarvestingUrl(); int c = rootArchiveUrl.indexOf("/OAIHandler"); if (c > 0) { rootArchiveUrl = rootArchiveUrl.substring(0, c); return rootArchiveUrl + "/faces/study/StudyPage.xhtml?globalId=" + getGlobalId(); } - } else if (HarvestingDataverseConfig.HARVEST_STYLE_ICPSR.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { + } else if (HarvestingClient.HARVEST_STYLE_ICPSR.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { // For the ICPSR, it turns out that the best thing to do is to // rely on the DOI to send the user to the right landing page for // the study: //String icpsrId = identifier; - //return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl() + "/icpsrweb/ICPSR/studies/"+icpsrId+"?q="+icpsrId+"&searchSource=icpsr-landing"; + //return this.getOwner().getHarvestingClientConfig().getArchiveUrl() + "/icpsrweb/ICPSR/studies/"+icpsrId+"?q="+icpsrId+"&searchSource=icpsr-landing"; return "http://doi.org/" + authority + "/" + identifier; - } else if (HarvestingDataverseConfig.HARVEST_STYLE_NESSTAR.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { - String nServerURL = this.getOwner().getHarvestingDataverseConfig().getArchiveUrl(); + } else if (HarvestingClient.HARVEST_STYLE_NESSTAR.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { + String nServerURL = this.getOwner().getHarvestingClientConfig().getArchiveUrl(); // chop any trailing slashes in the server URL - or they will result // in multiple slashes in the final URL pointing to the study // on server of origin; Nesstar doesn't like it, apparently. @@ -556,9 +557,9 @@ public String getRemoteArchiveURL() { + "&top=yes"; return NesstarWebviewPage; - } else if (HarvestingDataverseConfig.HARVEST_STYLE_ROPER.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { - return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl() + "/CFIDE/cf/action/catalog/abstract.cfm?archno=" + identifier; - } else if (HarvestingDataverseConfig.HARVEST_STYLE_HGL.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { + } else if (HarvestingClient.HARVEST_STYLE_ROPER.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { + return this.getOwner().getHarvestingClientConfig().getArchiveUrl() + "/CFIDE/cf/action/catalog/abstract.cfm?archno=" + identifier; + } else if (HarvestingClient.HARVEST_STYLE_HGL.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { // a bit of a hack, true. // HGL documents, when turned into Dataverse studies/datasets // all 1 datafile; the location ("storage identifier") of the file @@ -574,9 +575,9 @@ public String getRemoteArchiveURL() { } } } - return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl(); + return this.getOwner().getHarvestingClientConfig().getArchiveUrl(); }else { - return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl(); + return this.getOwner().getHarvestingClientConfig().getArchiveUrl(); } } @@ -585,7 +586,7 @@ public String getRemoteArchiveURL() { public String getHarvestingDescription() { if (isHarvested()) { - return this.getOwner().getHarvestingDataverseConfig().getArchiveDescription(); + return this.getOwner().getHarvestingClientConfig().getArchiveDescription(); } return null; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 311851eda6b..2d61c2ef53e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.util.MarkupChecker; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.util.StringUtil; @@ -811,9 +812,9 @@ public String getCitation(boolean isOnlineVersion) { // It is always part of the citation for the local datasets; // And for *some* harvested datasets. if (!this.getDataset().isHarvested() - || HarvestingDataverseConfig.HARVEST_STYLE_VDC.equals(this.getDataset().getOwner().getHarvestingDataverseConfig().getHarvestStyle()) - || HarvestingDataverseConfig.HARVEST_STYLE_ICPSR.equals(this.getDataset().getOwner().getHarvestingDataverseConfig().getHarvestStyle()) - || HarvestingDataverseConfig.HARVEST_STYLE_DATAVERSE.equals(this.getDataset().getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { + || HarvestingClient.HARVEST_STYLE_VDC.equals(this.getDataset().getOwner().getHarvestingClientConfig().getHarvestStyle()) + || HarvestingClient.HARVEST_STYLE_ICPSR.equals(this.getDataset().getOwner().getHarvestingClientConfig().getHarvestStyle()) + || HarvestingClient.HARVEST_STYLE_DATAVERSE.equals(this.getDataset().getOwner().getHarvestingClientConfig().getHarvestStyle())) { if (!StringUtil.isEmpty(this.getDataset().getIdentifier())) { if (!StringUtil.isEmpty(str)) { str += ", "; diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index b97d3402f81..c3580f807fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -1,5 +1,6 @@ package edu.harvard.iq.dataverse; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; import java.util.ArrayList; @@ -277,18 +278,18 @@ public void setGuestbooks(List guestbooks) { } @OneToOne (mappedBy="dataverse", cascade={CascadeType.PERSIST, CascadeType.REMOVE}) - private HarvestingDataverseConfig harvestingDataverseConfig; + private HarvestingClient harvestingClient; - public HarvestingDataverseConfig getHarvestingDataverseConfig() { - return this.harvestingDataverseConfig; + public HarvestingClient getHarvestingClientConfig() { + return this.harvestingClient; } - public void setHarvestingDataverseConfig(HarvestingDataverseConfig harvestingDataverseConfig) { - this.harvestingDataverseConfig = harvestingDataverseConfig; + public void setHarvestingClientConfig(HarvestingClient harvestingClient) { + this.harvestingClient = harvestingClient; } public boolean isHarvested() { - return harvestingDataverseConfig != null; + return harvestingClient != null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 33ce58ff250..4a95f79e065 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -511,7 +511,7 @@ public void setHarvestResult(Long hdId, String result) { Dataverse hd = em.find(Dataverse.class, hdId); em.refresh(hd); if (hd.isHarvested()) { - hd.getHarvestingDataverseConfig().setHarvestResult(result); + hd.getHarvestingClientConfig().setHarvestResult(result); } } @@ -520,7 +520,7 @@ public void resetHarvestingStatus(Long hdId) { Dataverse hd = em.find(Dataverse.class, hdId); em.refresh(hd); if (hd.isHarvested()) { - hd.getHarvestingDataverseConfig().setHarvestingNow(false); + hd.getHarvestingClientConfig().setHarvestingNow(false); } } @@ -530,7 +530,7 @@ public void setHarvestInProgress(Long hdId, boolean inProgress) { Dataverse hd = em.find(Dataverse.class, hdId); em.refresh(hd); if (hd.isHarvested()) { - hd.getHarvestingDataverseConfig().setHarvestingNow(inProgress); + hd.getHarvestingClientConfig().setHarvestingNow(inProgress); } } @@ -539,7 +539,7 @@ public void setLastHarvestTime(Long hdId, Date lastHarvestTime) { Dataverse hd = em.find(Dataverse.class, hdId); em.refresh(hd); if (hd.isHarvested()) { - hd.getHarvestingDataverseConfig().setLastHarvestTime(lastHarvestTime); + hd.getHarvestingClientConfig().setLastHarvestTime(lastHarvestTime); } } @@ -548,14 +548,14 @@ public void setHarvestSuccess(Long hdId, Date currentTime, int harvestedCount, i Dataverse hd = em.find(Dataverse.class, hdId); em.refresh(hd); if (hd.isHarvested()) { - hd.getHarvestingDataverseConfig().setLastHarvestTime(currentTime); - hd.getHarvestingDataverseConfig().setLastSuccessfulHarvestTime(currentTime); - hd.getHarvestingDataverseConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_SUCCESS); + hd.getHarvestingClientConfig().setLastHarvestTime(currentTime); + hd.getHarvestingClientConfig().setLastSuccessfulHarvestTime(currentTime); + hd.getHarvestingClientConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_SUCCESS); if (harvestedCount > 0 || failedCount > 0) { - hd.getHarvestingDataverseConfig().setLastNonEmptyHarvestTime(currentTime); - hd.getHarvestingDataverseConfig().setHarvestedDatasetCount(new Long(harvestedCount)); - hd.getHarvestingDataverseConfig().setFailedDatasetCount(new Long(failedCount)); + hd.getHarvestingClientConfig().setLastNonEmptyHarvestTime(currentTime); + hd.getHarvestingClientConfig().setHarvestedDatasetCount(new Long(harvestedCount)); + hd.getHarvestingClientConfig().setFailedDatasetCount(new Long(failedCount)); /*TODO: record the number of deleted datasets! */ } } @@ -567,9 +567,9 @@ public void setHarvestSuccessNotEmpty(Long hdId, Date currentTime, int harvested em.refresh(hd); if (hd.isHarvested()) { /* TODO: - hd.getHarvestingDataverseConfig().setLastSuccessfulNonZeroHarvestTime(currentTime); - hd.getHarvestingDataverseConfig().setHarvestedStudyCountNonZero(new Long(harvestedCount)); - hd.getHarvestingDataverseConfig().setFailedStudyCountNonZero(new Long(failedCount)); + hd.getHarvestingClientConfig().setLastSuccessfulNonZeroHarvestTime(currentTime); + hd.getHarvestingClientConfig().setHarvestedStudyCountNonZero(new Long(harvestedCount)); + hd.getHarvestingClientConfig().setFailedStudyCountNonZero(new Long(failedCount)); * } }*/ @@ -579,8 +579,8 @@ public void setHarvestFailure(Long hdId, Date currentTime) { Dataverse hd = em.find(Dataverse.class, hdId); em.refresh(hd); if (hd.isHarvested()) { - hd.getHarvestingDataverseConfig().setLastHarvestTime(currentTime); - hd.getHarvestingDataverseConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_FAILED); + hd.getHarvestingClientConfig().setLastHarvestTime(currentTime); + hd.getHarvestingClientConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_FAILED); } } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 7f5f8a9533c..e8e77d7dc1f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -57,6 +57,9 @@ import javax.ws.rs.QueryParam; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; +import static edu.harvard.iq.dataverse.api.AbstractApiBean.errorResponse; +import static edu.harvard.iq.dataverse.api.AbstractApiBean.errorResponse; +import static edu.harvard.iq.dataverse.api.AbstractApiBean.errorResponse; @Path("datasets") public class Datasets extends AbstractApiBean { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index a787f3e26ee..ab8ce890332 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -79,6 +79,9 @@ import javax.ws.rs.core.Response; import javax.ws.rs.core.Response.Status; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; +import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; /** * A REST API for dataverses. diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java index 818bbef4bbc..c279201a0b7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java @@ -2,7 +2,7 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.HarvestingDataverseConfig; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; @@ -99,7 +99,7 @@ public static JsonArrayBuilder harvestingConfigsAsJsonArray(List harv } public static JsonObjectBuilder harvestingConfigAsJson(Dataverse dataverse) { - HarvestingDataverseConfig harvestingConfig = dataverse.getHarvestingDataverseConfig(); + HarvestingClient harvestingConfig = dataverse.getHarvestingClientConfig(); if (harvestingConfig == null) { return null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java new file mode 100644 index 00000000000..c4d09b2ef6e --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java @@ -0,0 +1,33 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +/** + * + * @author Leonid Andreev + */ +@RequiredPermissions( Permission.EditDataverse ) +public class CreateHarvestingClientCommand extends AbstractCommand { + + private final Dataverse dv; + + public CreateHarvestingClientCommand(DataverseRequest aRequest, Dataverse motherDataverse) { + super(aRequest, motherDataverse); + dv = motherDataverse; + } + + @Override + public HarvestingClient execute(CommandContext ctxt) throws CommandException { + // TODO: check if the harvesting client config is legit; + // and that it is new. + return ctxt.dataverses().save(dv).getHarvestingClientConfig(); + } + +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java new file mode 100644 index 00000000000..38999fb45c8 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java @@ -0,0 +1,38 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; + +/** + * + * @author Leonid Andreev + */ +@RequiredPermissions( Permission.EditDataverse ) +public class DeleteHarvestingClientCommand extends AbstractVoidCommand { + + private final Dataverse motherDataverse; + + public DeleteHarvestingClientCommand(DataverseRequest aRequest, Dataverse motherDataverse) { + super(aRequest, motherDataverse); + this.motherDataverse = motherDataverse; + } + + @Override + public void executeImpl(CommandContext ctxt) throws CommandException { + HarvestingClient harvestingClient = motherDataverse.getHarvestingClientConfig(); + if (harvestingClient == null) { + throw new IllegalCommandException("No harvesting client is configured for dataverse "+motherDataverse.getAlias(), this); + } + motherDataverse.setHarvestingClientConfig(null); + ctxt.em().remove(harvestingClient); + ctxt.em().merge(motherDataverse); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java new file mode 100644 index 00000000000..77c8bf75e57 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java @@ -0,0 +1,33 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; + +/** + * + * @author Leonid Andreev + */ +@RequiredPermissions( Permission.EditDataverse ) +public class UpdateHarvestingClientCommand extends AbstractCommand { + + private final Dataverse dv; + + public UpdateHarvestingClientCommand(DataverseRequest aRequest, Dataverse motherDataverse) { + super(aRequest, motherDataverse); + dv = motherDataverse; + } + + @Override + public HarvestingClient execute(CommandContext ctxt) throws CommandException { + // TODO: check if the harvesting client config attached to the dataverse + // is legit; and that it already exists. + return ctxt.em().merge(dv).getHarvestingClientConfig(); + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 1ee6009c975..e647f287db7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -9,7 +9,6 @@ import edu.harvard.iq.dataverse.DatasetServiceBean; import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.HarvestingDataverseConfig; import edu.harvard.iq.dataverse.timer.DataverseTimerServiceBean; import edu.harvard.iq.dataverse.util.FileUtil; import java.io.File; @@ -93,7 +92,7 @@ public void createScheduledHarvestTimers() { List dataverses = dataverseService.getAllHarvestedDataverses(); for (Iterator it = dataverses.iterator(); it.hasNext();) { Dataverse dataverse = (Dataverse) it.next(); - HarvestingDataverseConfig harvestingConfig = dataverse.getHarvestingDataverseConfig(); + HarvestingClient harvestingConfig = dataverse.getHarvestingClientConfig(); if (harvestingConfig == null) { logger.warning("ERROR: no harvesting config found for dataverse id="+dataverse.getId()); } else if (harvestingConfig.isScheduled()) { @@ -125,7 +124,7 @@ public List getHarvestTimers() { } private void createHarvestTimer(Dataverse harvestingDataverse) { - HarvestingDataverseConfig harvestingDataverseConfig = harvestingDataverse.getHarvestingDataverseConfig(); + HarvestingClient harvestingDataverseConfig = harvestingDataverse.getHarvestingClientConfig(); if (harvestingDataverseConfig == null) { logger.info("ERROR: No Harvesting Configuration found for dataverse id="+harvestingDataverse.getId()); @@ -171,7 +170,7 @@ public void doHarvest(Long dataverseId) throws IOException { throw new IOException("No such Dataverse: id="+dataverseId); } - HarvestingDataverseConfig harvestingDataverseConfig = harvestingDataverse.getHarvestingDataverseConfig(); + HarvestingClient harvestingDataverseConfig = harvestingDataverse.getHarvestingClientConfig(); if (harvestingDataverseConfig == null) { throw new IOException("Could not find Harvesting Config for Dataverse id="+dataverseId); @@ -373,7 +372,7 @@ public Long getRecord(HarvestingDataverse dataverse, String identifier, String m public Long getRecord(Logger hdLogger, Dataverse dataverse, String identifier, String metadataPrefix, MutableBoolean recordErrorOccurred) { String errMessage = null; - HarvestingDataverseConfig harvestingConfig = dataverse.getHarvestingDataverseConfig(); + HarvestingClient harvestingConfig = dataverse.getHarvestingClientConfig(); if (harvestingConfig == null) { errMessage = "Could not find Harvesting Config for Dataverse id="+dataverse.getId(); diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java similarity index 96% rename from src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java rename to src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index 9ef7a49b802..98d9ac8b3cb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -3,8 +3,9 @@ * To change this template file, choose Tools | Templates * and open the template in the editor. */ -package edu.harvard.iq.dataverse; +package edu.harvard.iq.dataverse.harvest.client; +import edu.harvard.iq.dataverse.Dataverse; import java.io.Serializable; import java.text.SimpleDateFormat; import java.util.Calendar; @@ -33,7 +34,7 @@ , @Index(columnList="harveststyle") , @Index(columnList="harvestingurl")}) @Entity -public class HarvestingDataverseConfig implements Serializable { +public class HarvestingClient implements Serializable { private static final long serialVersionUID = 1L; @Id @@ -67,7 +68,7 @@ public void setId(Long id) { public static final String SCHEDULE_PERIOD_DAILY="daily"; public static final String SCHEDULE_PERIOD_WEEKLY="weekly"; - public HarvestingDataverseConfig() { + public HarvestingClient() { this.harvestType = HARVEST_TYPE_OAI; // default harvestType this.harvestStyle = HARVEST_STYLE_DATAVERSE; // default harvestStyle } @@ -171,6 +172,7 @@ public void setMetadataPrefix(String metadataPrefix) { this.metadataPrefix = metadataPrefix; } + /* move the fields below to the new HarvestingClientRun class: */ private String harvestResult; public String getHarvestResult() { @@ -256,6 +258,7 @@ public Long getDeletedDatasetCount() { public void setDeletedDatasetCount(Long deletedDatasetCount) { this.deletedDatasetCount = deletedDatasetCount; } + /**/ private boolean scheduled; @@ -336,10 +339,10 @@ public int hashCode() { @Override public boolean equals(Object object) { // TODO: Warning - this method won't work in the case the id fields are not set - if (!(object instanceof HarvestingDataverseConfig)) { + if (!(object instanceof HarvestingClient)) { return false; } - HarvestingDataverseConfig other = (HarvestingDataverseConfig) object; + HarvestingClient other = (HarvestingClient) object; if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { return false; } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientRun.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientRun.java new file mode 100644 index 00000000000..404ace6cd42 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientRun.java @@ -0,0 +1,132 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse.harvest.client; + +import java.io.Serializable; +import java.util.Date; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Temporal; +import javax.persistence.TemporalType; + +/** + * + * @author Leonid Andreev + * + * This is a record of an attempted harvesting client run. + * (Should it be named HarvestingClientRunResult instead?) + */ +@Entity +public class HarvestingClientRun implements Serializable { + + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.AUTO) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + private String harvestResult; // TODO: should this me an enum instead? -- L.A. 4.4 + + public String getHarvestResult() { + return harvestResult; + } + + public void setHarvestResult(String harvestResult) { + this.harvestResult = harvestResult; + } + + // Time of this harvest attempt: + + @Temporal(value = TemporalType.TIMESTAMP) + private Date startTime; + + public Date getStartTime() { + return startTime; + } + + public void setStartTime(Date startTime) { + this.startTime = startTime; + } + + @Temporal(value = TemporalType.TIMESTAMP) + private Date finishTime; + + public Date getFinishTime() { + return finishTime; + } + + public void setFinishTime(Date finishTime) { + this.finishTime = finishTime; + } + + + // Tese are the Dataset counts from that last harvest: + // (TODO: do we need to differentiate between *created* (new), and *updated* + // harvested datasets? -- L.A. 4.4 + + private Long harvestedDatasetCount; + private Long failedDatasetCount; + private Long deletedDatasetCount; + + public Long getHarvestedDatasetCount() { + return harvestedDatasetCount; + } + + public void setHarvestedDatasetCount(Long harvestedDatasetCount) { + this.harvestedDatasetCount = harvestedDatasetCount; + } + + public Long getFailedDatasetCount() { + return failedDatasetCount; + } + + public void setFailedDatasetCount(Long failedDatasetCount) { + this.failedDatasetCount = failedDatasetCount; + } + + public Long getDeletedDatasetCount() { + return deletedDatasetCount; + } + + public void setDeletedDatasetCount(Long deletedDatasetCount) { + this.deletedDatasetCount = deletedDatasetCount; + } + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof HarvestingClientRun)) { + return false; + } + HarvestingClientRun other = (HarvestingClientRun) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.harvest.client.HarvestingClientRun[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java new file mode 100644 index 00000000000..7783e64e8ff --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -0,0 +1,24 @@ +package edu.harvard.iq.dataverse.harvest.client; + +import edu.harvard.iq.dataverse.DataverseServiceBean; +import java.util.logging.Logger; +import javax.ejb.EJB; +import javax.ejb.Stateless; +import javax.faces.bean.ManagedBean; +import javax.inject.Named; + +/** + * + * @author Leonid Andreev + * + * Dedicated service for managing Harvesting Client Configurations + */ +@Stateless(name = "harvesterService") +@Named +@ManagedBean +public class HarvestingClientServiceBean { + @EJB + DataverseServiceBean dataverseService; + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvestingClinetServiceBean"); +} diff --git a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java index 106eefa5753..f438bd10c14 100644 --- a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java @@ -7,7 +7,7 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.HarvestingDataverseConfig; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.harvest.client.HarvestTimerInfo; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; import java.io.Serializable; @@ -129,7 +129,7 @@ public void removeHarvestTimers() { } private void createHarvestTimer(Dataverse harvestedDataverse) { - HarvestingDataverseConfig harvestedDataverseConfig = harvestedDataverse.getHarvestingDataverseConfig(); + HarvestingClient harvestedDataverseConfig = harvestedDataverse.getHarvestingClientConfig(); if (harvestedDataverseConfig == null) { logger.info("ERROR: No Harvesting Configuration found for dataverse id="+harvestedDataverse.getId()); @@ -141,7 +141,7 @@ private void createHarvestTimer(Dataverse harvestedDataverse) { Calendar initExpiration = Calendar.getInstance(); initExpiration.set(Calendar.MINUTE, 0); initExpiration.set(Calendar.SECOND, 0); - if (harvestedDataverseConfig.getSchedulePeriod().equals(HarvestingDataverseConfig.SCHEDULE_PERIOD_DAILY)) { + if (harvestedDataverseConfig.getSchedulePeriod().equals(HarvestingClient.SCHEDULE_PERIOD_DAILY)) { intervalDuration = 1000 * 60 * 60 * 24; initExpiration.set(Calendar.HOUR_OF_DAY, harvestedDataverseConfig.getScheduleHourOfDay()); From 6315e1924a9470b3df4bd03e6016960c9b5d85fb Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 27 Apr 2016 16:10:31 -0400 Subject: [PATCH 09/37] more refactoring; added a Command for retrieving a HarvstingClient; more fixes; --- .../iq/dataverse/DataverseServiceBean.java | 78 ---------- .../harvard/iq/dataverse/api/Harvesting.java | 10 +- .../impl/GetHarvestingClientCommand.java | 51 +++++++ ...ngClientRun.java => ClientHarvestRun.java} | 104 ++++++++++---- .../harvest/client/HarvesterServiceBean.java | 41 +++--- .../harvest/client/HarvestingClient.java | 134 ++++++++++++++++-- .../client/HarvestingClientServiceBean.java | 114 ++++++++++++++- .../timer/DataverseTimerServiceBean.java | 14 +- 8 files changed, 408 insertions(+), 138 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java rename src/main/java/edu/harvard/iq/dataverse/harvest/client/{HarvestingClientRun.java => ClientHarvestRun.java} (56%) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 4a95f79e065..003df01f87b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -505,82 +505,4 @@ public void populateDvSearchCard(SolrSearchResult solrSearchResult) { } } } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestResult(Long hdId, String result) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - if (hd.isHarvested()) { - hd.getHarvestingClientConfig().setHarvestResult(result); - } - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void resetHarvestingStatus(Long hdId) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - if (hd.isHarvested()) { - hd.getHarvestingClientConfig().setHarvestingNow(false); - } - - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestInProgress(Long hdId, boolean inProgress) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - if (hd.isHarvested()) { - hd.getHarvestingClientConfig().setHarvestingNow(inProgress); - } - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setLastHarvestTime(Long hdId, Date lastHarvestTime) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - if (hd.isHarvested()) { - hd.getHarvestingClientConfig().setLastHarvestTime(lastHarvestTime); - } - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestSuccess(Long hdId, Date currentTime, int harvestedCount, int failedCount) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - if (hd.isHarvested()) { - hd.getHarvestingClientConfig().setLastHarvestTime(currentTime); - hd.getHarvestingClientConfig().setLastSuccessfulHarvestTime(currentTime); - hd.getHarvestingClientConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_SUCCESS); - - if (harvestedCount > 0 || failedCount > 0) { - hd.getHarvestingClientConfig().setLastNonEmptyHarvestTime(currentTime); - hd.getHarvestingClientConfig().setHarvestedDatasetCount(new Long(harvestedCount)); - hd.getHarvestingClientConfig().setFailedDatasetCount(new Long(failedCount)); - /*TODO: record the number of deleted datasets! */ - } - } - } -/* - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestSuccessNotEmpty(Long hdId, Date currentTime, int harvestedCount, int failedCount) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - if (hd.isHarvested()) { - /* TODO: - hd.getHarvestingClientConfig().setLastSuccessfulNonZeroHarvestTime(currentTime); - hd.getHarvestingClientConfig().setHarvestedStudyCountNonZero(new Long(harvestedCount)); - hd.getHarvestingClientConfig().setFailedStudyCountNonZero(new Long(failedCount)); - * - } - }*/ - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestFailure(Long hdId, Date currentTime) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - if (hd.isHarvested()) { - hd.getHarvestingClientConfig().setLastHarvestTime(currentTime); - hd.getHarvestingClientConfig().setHarvestResult(edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean.HARVEST_RESULT_FAILED); - } - } } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java index c279201a0b7..444bed6e390 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java @@ -111,13 +111,13 @@ public static JsonObjectBuilder harvestingConfigAsJson(Dataverse dataverse) { add("metadataFormat", harvestingConfig.getMetadataPrefix()). add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()). add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none"). - add("inProgress", harvestingConfig.isHarvestingNow() ? "yes" : "-"). + add("status", harvestingConfig.isHarvestingNow() ? "inProgress" : "inActive"). add("lastHarvest", harvestingConfig.getLastHarvestTime() == null ? "N/A" : harvestingConfig.getLastHarvestTime().toString()). + add("lastResult", harvestingConfig.getLastResult()). add("lastSuccessful", harvestingConfig.getLastSuccessfulHarvestTime() == null ? "N/A" : harvestingConfig.getLastSuccessfulHarvestTime().toString()). add("lastNonEmpty", harvestingConfig.getLastNonEmptyHarvestTime() == null ? "N/A" : harvestingConfig.getLastNonEmptyHarvestTime().toString()). - add("lastResult", harvestingConfig.getHarvestResult()). - add("datasetsHarveted", harvestingConfig.getHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getHarvestedDatasetCount().toString()). - add("datasetsDeleted", harvestingConfig.getDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getDeletedDatasetCount().toString()). - add("datasetsFailed", harvestingConfig.getFailedDatasetCount() == null ? "N/A" : harvestingConfig.getFailedDatasetCount().toString()); + add("lastDatasetsHarvested", harvestingConfig.getLastHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getLastHarvestedDatasetCount().toString()). + add("lastDatasetsDeleted", harvestingConfig.getLastDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getLastDeletedDatasetCount().toString()). + add("lastDatasetsFailed", harvestingConfig.getLastFailedDatasetCount() == null ? "N/A" : harvestingConfig.getLastFailedDatasetCount().toString()); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java new file mode 100644 index 00000000000..d0929e2ec1a --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java @@ -0,0 +1,51 @@ +package edu.harvard.iq.dataverse.engine.command.impl; + +import edu.harvard.iq.dataverse.Dataverse; +import edu.harvard.iq.dataverse.authorization.Permission; +import edu.harvard.iq.dataverse.engine.command.AbstractCommand; +import edu.harvard.iq.dataverse.engine.command.CommandContext; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.exception.CommandException; +import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; +import java.util.Collections; +import java.util.Map; +import java.util.Set; + +/** + * + * @author Leonid Andreev + */ +// One can view the configuration of a Harvesting Client if and only if +// they have the permission to view the dataverse that owns the harvesting +// client. And for a Dataverse, we cannot define the permission with a +// @RequiredPermission annotation - because the decision has to be made dynamically: +// Everybody can view a published Dataverse; otherwise, an explicit +// ViewUnpublishedDataverse is needed. +// This is defined in the getRequiredPermissions() method, below. +public class GetHarvestingClientCommand extends AbstractCommand{ + private final Dataverse ownerDataverse; + + public GetHarvestingClientCommand(DataverseRequest aRequest, Dataverse ownerDataverse) { + super(aRequest, ownerDataverse); + this.ownerDataverse = ownerDataverse; + } + + @Override + public HarvestingClient execute(CommandContext ctxt) throws CommandException { + if (ownerDataverse == null) { + throw new IllegalCommandException("GetHarvestingClientCommand called on a null dataverse object", this); + } + if (ownerDataverse.getHarvestingClientConfig() == null) { + throw new IllegalCommandException("No harvesting client is configured for dataverse "+ownerDataverse.getAlias(), this); + } + return ownerDataverse.getHarvestingClientConfig(); + } + + @Override + public Map> getRequiredPermissions() { + return Collections.singletonMap("", + ownerDataverse.isReleased() ? Collections.emptySet() + : Collections.singleton(Permission.ViewUnpublishedDataverse)); + } +} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientRun.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java similarity index 56% rename from src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientRun.java rename to src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java index 404ace6cd42..7cd8c4e603d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientRun.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java @@ -11,20 +11,23 @@ import javax.persistence.GeneratedValue; import javax.persistence.GenerationType; import javax.persistence.Id; +import javax.persistence.JoinColumn; +import javax.persistence.ManyToOne; import javax.persistence.Temporal; import javax.persistence.TemporalType; /** * * @author Leonid Andreev - * - * This is a record of an attempted harvesting client run. - * (Should it be named HarvestingClientRunResult instead?) + * + * This is a record of an attempted harvesting client run. (Should it be named + * HarvestingClientRunResult instead?) */ @Entity -public class HarvestingClientRun implements Serializable { +public class ClientHarvestRun implements Serializable { private static final long serialVersionUID = 1L; + @Id @GeneratedValue(strategy = GenerationType.AUTO) private Long id; @@ -37,18 +40,71 @@ public void setId(Long id) { this.id = id; } - private String harvestResult; // TODO: should this me an enum instead? -- L.A. 4.4 + public enum RunResultType { SUCCESS, FAILURE, INPROGRESS }; + + private static String RESULT_LABEL_SUCCESS = "SUCCESS"; + private static String RESULT_LABEL_FAILURE = "FAILED"; + private static String RESULT_LABEL_INPROGRESS = "INPROGRESS"; - public String getHarvestResult() { + @ManyToOne + @JoinColumn(nullable = false) + private HarvestingClient harvestingClient; + + public HarvestingClient getHarvestingClient() { + return harvestingClient; + } + + public void setHarvestingClient(HarvestingClient harvestingClient) { + this.harvestingClient = harvestingClient; + } + + private RunResultType harvestResult; + + public RunResultType getResult() { return harvestResult; } + + public String getResultLabel() { + if (isSuccess()) { + return RESULT_LABEL_SUCCESS; + } else if (isFailed()) { + return RESULT_LABEL_FAILURE; + } else if (isInProgress()) { + return RESULT_LABEL_INPROGRESS; + } + return null; + } - public void setHarvestResult(String harvestResult) { + public void setResult(RunResultType harvestResult) { this.harvestResult = harvestResult; } + + public boolean isSuccess() { + return RunResultType.SUCCESS == harvestResult; + } + + public void setSuccess() { + harvestResult = RunResultType.SUCCESS; + } + + public boolean isFailed() { + return RunResultType.FAILURE == harvestResult; + } + + public void setFailed() { + harvestResult = RunResultType.FAILURE; + } - // Time of this harvest attempt: + public boolean isInProgress() { + return RunResultType.INPROGRESS == harvestResult || + (harvestResult == null && startTime != null && finishTime == null); + } + public void setInProgress() { + harvestResult = RunResultType.INPROGRESS; + } + + // Time of this harvest attempt: @Temporal(value = TemporalType.TIMESTAMP) private Date startTime; @@ -57,12 +113,12 @@ public Date getStartTime() { } public void setStartTime(Date startTime) { - this.startTime = startTime; + this.startTime = startTime; } - + @Temporal(value = TemporalType.TIMESTAMP) - private Date finishTime; - + private Date finishTime; + public Date getFinishTime() { return finishTime; } @@ -70,16 +126,14 @@ public Date getFinishTime() { public void setFinishTime(Date finishTime) { this.finishTime = finishTime; } - - + // Tese are the Dataset counts from that last harvest: // (TODO: do we need to differentiate between *created* (new), and *updated* // harvested datasets? -- L.A. 4.4 - - private Long harvestedDatasetCount; - private Long failedDatasetCount; - private Long deletedDatasetCount; - + private Long harvestedDatasetCount = 0L; + private Long failedDatasetCount = 0L; + private Long deletedDatasetCount = 0L; + public Long getHarvestedDatasetCount() { return harvestedDatasetCount; } @@ -87,7 +141,7 @@ public Long getHarvestedDatasetCount() { public void setHarvestedDatasetCount(Long harvestedDatasetCount) { this.harvestedDatasetCount = harvestedDatasetCount; } - + public Long getFailedDatasetCount() { return failedDatasetCount; } @@ -95,7 +149,7 @@ public Long getFailedDatasetCount() { public void setFailedDatasetCount(Long failedDatasetCount) { this.failedDatasetCount = failedDatasetCount; } - + public Long getDeletedDatasetCount() { return deletedDatasetCount; } @@ -103,7 +157,7 @@ public Long getDeletedDatasetCount() { public void setDeletedDatasetCount(Long deletedDatasetCount) { this.deletedDatasetCount = deletedDatasetCount; } - + @Override public int hashCode() { int hash = 0; @@ -114,10 +168,10 @@ public int hashCode() { @Override public boolean equals(Object object) { // TODO: Warning - this method won't work in the case the id fields are not set - if (!(object instanceof HarvestingClientRun)) { + if (!(object instanceof ClientHarvestRun)) { return false; } - HarvestingClientRun other = (HarvestingClientRun) object; + ClientHarvestRun other = (ClientHarvestRun) object; if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { return false; } @@ -128,5 +182,5 @@ public boolean equals(Object object) { public String toString() { return "edu.harvard.iq.dataverse.harvest.client.HarvestingClientRun[ id=" + id + " ]"; } - + } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index e647f287db7..24cf96fbd65 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -54,9 +54,10 @@ public class HarvesterServiceBean { javax.ejb.TimerService timerService; @EJB DataverseTimerServiceBean dataverseTimerService; + @EJB + HarvestingClientServiceBean harvestingClientService; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean"); - private static final String HARVEST_TIMER = "HarvestTimer"; private static final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); @@ -64,8 +65,6 @@ public class HarvesterServiceBean { public static final String HARVEST_RESULT_FAILED="failed"; - private Unmarshaller unmarshaller; - private long processedSizeThisBatch = 0; private List harvestedDatasetIdsThisBatch = null; public HarvesterServiceBean() { @@ -170,9 +169,9 @@ public void doHarvest(Long dataverseId) throws IOException { throw new IOException("No such Dataverse: id="+dataverseId); } - HarvestingClient harvestingDataverseConfig = harvestingDataverse.getHarvestingClientConfig(); + HarvestingClient harvestingClientConfig = harvestingDataverse.getHarvestingClientConfig(); - if (harvestingDataverseConfig == null) { + if (harvestingClientConfig == null) { throw new IOException("Could not find Harvesting Config for Dataverse id="+dataverseId); } @@ -191,37 +190,36 @@ public void doHarvest(Long dataverseId) throws IOException { Date harvestStartTime = new Date(); try { - boolean harvestingNow = harvestingDataverseConfig.isHarvestingNow(); + boolean harvestingNow = harvestingClientConfig.isHarvestingNow(); if (harvestingNow) { harvestErrorOccurred.setValue(true); hdLogger.log(Level.SEVERE, "Cannot begin harvesting, Dataverse " + harvestingDataverse.getName() + " is currently being harvested."); } else { - dataverseService.resetHarvestingStatus(harvestingDataverse.getId()); + harvestingClientService.resetHarvestInProgress(harvestingDataverse.getId()); String until = null; // If we don't set until date, we will get all the changes since the last harvest. String from = null; - Date lastSuccessfulHarvestTime = harvestingDataverseConfig.getLastSuccessfulHarvestTime(); + // TODO: should it be last *non-empty* time? -- L.A. 4.4 + Date lastSuccessfulHarvestTime = harvestingClientConfig.getLastSuccessfulHarvestTime(); if (lastSuccessfulHarvestTime != null) { from = formatter.format(lastSuccessfulHarvestTime); } - dataverseService.setHarvestInProgress(harvestingDataverse.getId(), true); - - dataverseService.setLastHarvestTime(harvestingDataverse.getId(), harvestStartTime); + harvestingClientService.setHarvestInProgress(harvestingDataverse.getId(), harvestStartTime); - hdLogger.log(Level.INFO, "BEGIN HARVEST..., oaiUrl=" + harvestingDataverseConfig.getArchiveUrl() + ",set=" + harvestingDataverseConfig.getHarvestingSet() + ", metadataPrefix=" + harvestingDataverseConfig.getMetadataPrefix() + ", from=" + from + ", until=" + until); + hdLogger.log(Level.INFO, "BEGIN HARVEST..., oaiUrl=" + harvestingClientConfig.getArchiveUrl() + ",set=" + harvestingClientConfig.getHarvestingSet() + ", metadataPrefix=" + harvestingClientConfig.getMetadataPrefix() + ", from=" + from + ", until=" + until); - if (harvestingDataverseConfig.isOai()) { + if (harvestingClientConfig.isOai()) { harvestedDatasetIds = harvestOAI(harvestingDataverse, hdLogger, from, until, harvestErrorOccurred, failedIdentifiers); } else { throw new IOException("Unsupported harvest type"); } - dataverseService.setHarvestSuccess(harvestingDataverse.getId(), harvestStartTime, harvestedDatasetIds.size(), failedIdentifiers.size()); - hdLogger.log(Level.INFO, "COMPLETED HARVEST, server=" + harvestingDataverseConfig.getArchiveUrl() + ", metadataPrefix=" + harvestingDataverseConfig.getMetadataPrefix()); + harvestingClientService.setHarvestSuccess(harvestingDataverse.getId(), new Date(), harvestedDatasetIds.size(), failedIdentifiers.size()); + hdLogger.log(Level.INFO, "COMPLETED HARVEST, server=" + harvestingClientConfig.getArchiveUrl() + ", metadataPrefix=" + harvestingClientConfig.getMetadataPrefix()); hdLogger.log(Level.INFO, "Datasets created/updated: " + harvestedDatasetIds.size() + ", datasets deleted: [TODO:], datasets failed: " + failedIdentifiers.size()); - // now index all studies (need to modify for update) + // now index all the datasets we have harvested - created, modified or deleted: /* (TODO: !!!) if (this.processedSizeThisBatch > 0) { hdLogger.log(Level.INFO, "POST HARVEST, reindexing the remaining studies."); @@ -239,14 +237,19 @@ public void doHarvest(Long dataverseId) throws IOException { //mailService.sendHarvestNotification(...getSystemEmail(), harvestingDataverse.getName(), logFileName, logTimestamp, harvestErrorOccurred.booleanValue(), harvestedDatasetIds.size(), failedIdentifiers); } catch (Throwable e) { harvestErrorOccurred.setValue(true); - String message = "Exception processing harvest, server= " + harvestingDataverseConfig.getArchiveUrl() + ",format=" + harvestingDataverseConfig.getMetadataPrefix() + " " + e.getClass().getName() + " " + e.getMessage(); + String message = "Exception processing harvest, server= " + harvestingClientConfig.getArchiveUrl() + ",format=" + harvestingClientConfig.getMetadataPrefix() + " " + e.getClass().getName() + " " + e.getMessage(); hdLogger.log(Level.SEVERE, message); logException(e, hdLogger); hdLogger.log(Level.INFO, "HARVEST NOT COMPLETED DUE TO UNEXPECTED ERROR."); - dataverseService.setHarvestFailure(harvestingDataverse.getId(), harvestStartTime); + // TODO: + // even though this harvesting run failed, we may have had successfully + // processed some number of datasets, by the time the exception was thrown. + // We should record that number too. And the number of the datasets that + // had failed, that we may have counted. -- L.A. 4.4 + harvestingClientService.setHarvestFailure(harvestingDataverse.getId(), new Date()); } finally { - dataverseService.setHarvestInProgress(harvestingDataverse.getId(), false); + harvestingClientService.resetHarvestInProgress(harvestingDataverse.getId()); fileHandler.close(); hdLogger.removeHandler(fileHandler); } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index 98d9ac8b3cb..2d31d8067a3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -11,6 +11,7 @@ import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; +import java.util.List; import javax.persistence.CascadeType; import javax.persistence.Column; import javax.persistence.Entity; @@ -19,7 +20,9 @@ import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.JoinColumn; +import javax.persistence.OneToMany; import javax.persistence.OneToOne; +import javax.persistence.OrderBy; import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; @@ -172,14 +175,129 @@ public void setMetadataPrefix(String metadataPrefix) { this.metadataPrefix = metadataPrefix; } - /* move the fields below to the new HarvestingClientRun class: */ + // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 + // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 + @OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) + @OrderBy("id") + private List harvestHistory; + + List getRunHistory() { + return harvestHistory; + } + + void setRunHistory(List harvestHistory) { + this.harvestHistory = harvestHistory; + } + + public String getLastResult() { + if (harvestHistory == null || harvestHistory.size() == 0) { + return null; + } + return harvestHistory.get(harvestHistory.size() - 1).getResultLabel(); + } + + public ClientHarvestRun getLastRun() { + if (harvestHistory == null || harvestHistory.size() == 0) { + return null; + } + + return harvestHistory.get(harvestHistory.size() - 1); + } + + public ClientHarvestRun getLastSuccessfulRun() { + if (harvestHistory == null || harvestHistory.size() == 0) { + return null; + } + + ClientHarvestRun harvestRun = null; + int i = harvestHistory.size() - 1; + + while (i > 0) { + if (harvestHistory.get(i).isSuccess()) { + return harvestHistory.get(i); + } + i--; + } + + return null; + } + + ClientHarvestRun getLastNonEmptyRun() { + if (harvestHistory == null || harvestHistory.size() == 0) { + return null; + } + + ClientHarvestRun harvestRun = null; + int i = harvestHistory.size() - 1; + + while (i > 0) { + if (harvestHistory.get(i).isSuccess()) { + if (harvestHistory.get(i).getHarvestedDatasetCount().longValue() > 0 || + harvestHistory.get(i).getDeletedDatasetCount().longValue() > 0) { + return harvestHistory.get(i); + } + } + i--; + } + return null; + } + + public Date getLastHarvestTime() { + ClientHarvestRun lastHarvest = getLastRun(); + if ( lastHarvest != null) { + return lastHarvest.getStartTime(); + } + return null; + } + + public Date getLastSuccessfulHarvestTime() { + ClientHarvestRun lastSuccessfulHarvest = getLastSuccessfulRun(); + if ( lastSuccessfulHarvest != null) { + return lastSuccessfulHarvest.getStartTime(); + } + return null; + } + + public Date getLastNonEmptyHarvestTime() { + ClientHarvestRun lastNonEmptyHarvest = getLastNonEmptyRun(); + if ( lastNonEmptyHarvest != null) { + return lastNonEmptyHarvest.getStartTime(); + } + return null; + } + + public Long getLastHarvestedDatasetCount() { + ClientHarvestRun lastNonEmptyHarvest = getLastNonEmptyRun(); + if ( lastNonEmptyHarvest != null) { + return lastNonEmptyHarvest.getHarvestedDatasetCount(); + } + return null; + } + + public Long getLastFailedDatasetCount() { + ClientHarvestRun lastNonEmptyHarvest = getLastNonEmptyRun(); + if ( lastNonEmptyHarvest != null) { + return lastNonEmptyHarvest.getFailedDatasetCount(); + } + return null; + } + + public Long getLastDeletedDatasetCount() { + ClientHarvestRun lastNonEmptyHarvest = getLastNonEmptyRun(); + if ( lastNonEmptyHarvest != null) { + return lastNonEmptyHarvest.getDeletedDatasetCount(); + } + return null; + } + + /* move the fields below to the new HarvestingClientRun class: private String harvestResult; - public String getHarvestResult() { + public String getResult() { return harvestResult; } - public void setHarvestResult(String harvestResult) { + public void setResult(String harvestResult) { this.harvestResult = harvestResult; } @@ -198,7 +316,7 @@ public void setLastHarvestTime(Date lastHarvestTime) { this.lastHarvestTime = lastHarvestTime; } - // This the last "successful harvest" - i.e., the last time we + // This is the last "successful harvest" - i.e., the last time we // tried to harvest, and got a response from the remote server. // We may not have necessarily harvested any useful content though; // the result may have been a "no content" or "no changes since the last harvest" @@ -235,7 +353,7 @@ public void setLastNonEmptyHarvestTime(Date lastNonEmptyHarvestTime) { private Long failedDatasetCount; private Long deletedDatasetCount; - public Long getHarvestedDatasetCount() { + public Long getLastHarvestedDatasetCount() { return harvestedDatasetCount; } @@ -243,7 +361,7 @@ public void setHarvestedDatasetCount(Long harvestedDatasetCount) { this.harvestedDatasetCount = harvestedDatasetCount; } - public Long getFailedDatasetCount() { + public Long getLastFailedDatasetCount() { return failedDatasetCount; } @@ -251,14 +369,14 @@ public void setFailedDatasetCount(Long failedDatasetCount) { this.failedDatasetCount = failedDatasetCount; } - public Long getDeletedDatasetCount() { + public Long getLastDeletedDatasetCount() { return deletedDatasetCount; } public void setDeletedDatasetCount(Long deletedDatasetCount) { this.deletedDatasetCount = deletedDatasetCount; } - /**/ + */ private boolean scheduled; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java index 7783e64e8ff..df97463172e 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -1,11 +1,18 @@ package edu.harvard.iq.dataverse.harvest.client; +import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; +import java.util.ArrayList; +import java.util.Date; import java.util.logging.Logger; import javax.ejb.EJB; import javax.ejb.Stateless; +import javax.ejb.TransactionAttribute; +import javax.ejb.TransactionAttributeType; import javax.faces.bean.ManagedBean; import javax.inject.Named; +import javax.persistence.EntityManager; +import javax.persistence.PersistenceContext; /** * @@ -20,5 +27,110 @@ public class HarvestingClientServiceBean { @EJB DataverseServiceBean dataverseService; - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvestingClinetServiceBean"); + @PersistenceContext(unitName = "VDCNet-ejbPU") + private EntityManager em; + + private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvestingClinetServiceBean"); + + /* let's try and live without this method: + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setHarvestResult(Long hdId, String result) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); // ?? + if (hd.isHarvested()) { + hd.getHarvestingClientConfig().setHarvestResult(result); + } + } + */ + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void resetHarvestInProgress(Long hdId) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + if (!hd.isHarvested()) { + return; + } + hd.getHarvestingClientConfig().setHarvestingNow(false); + + // And if there is an unfinished RunResult object, we'll + // just mark it as a failure: + if (hd.getHarvestingClientConfig().getLastRun() != null + && hd.getHarvestingClientConfig().getLastRun().isInProgress()) { + hd.getHarvestingClientConfig().getLastRun().setFailed(); + } + + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setHarvestInProgress(Long hdId, Date startTime) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + HarvestingClient harvestingClient = hd.getHarvestingClientConfig(); + if (harvestingClient == null) { + return; + } + harvestingClient.setHarvestingNow(false); + if (harvestingClient.getRunHistory() == null) { + harvestingClient.setRunHistory(new ArrayList()); + } + ClientHarvestRun currentRun = new ClientHarvestRun(); + currentRun.setHarvestingClient(harvestingClient); + currentRun.setStartTime(startTime); + currentRun.setInProgress(); + harvestingClient.getRunHistory().add(currentRun); + } + + /* + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setLastHarvestTime(Long hdId, Date lastHarvestTime) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + if (hd.isHarvested()) { + hd.getHarvestingClientConfig().setLastHarvestTime(lastHarvestTime); + } + }*/ + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setHarvestSuccess(Long hdId, Date currentTime, int harvestedCount, int failedCount) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + HarvestingClient harvestingClient = hd.getHarvestingClientConfig(); + if (harvestingClient == null) { + return; + } + + ClientHarvestRun currentRun = harvestingClient.getLastRun(); + + if (currentRun != null && currentRun.isInProgress()) { + // TODO: what if there's no current run in progress? should we just + // give up quietly, or should we make a noise of some kind? -- L.A. 4.4 + + currentRun.setSuccess(); + currentRun.setFinishTime(currentTime); + currentRun.setHarvestedDatasetCount(new Long(harvestedCount)); + currentRun.setFailedDatasetCount(new Long(failedCount)); + + /*TODO: still need to record the number of deleted datasets! */ + } + } + + @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) + public void setHarvestFailure(Long hdId, Date currentTime) { + Dataverse hd = em.find(Dataverse.class, hdId); + em.refresh(hd); + HarvestingClient harvestingClient = hd.getHarvestingClientConfig(); + if (harvestingClient == null) { + return; + } + + ClientHarvestRun currentRun = harvestingClient.getLastRun(); + + if (currentRun != null && currentRun.isInProgress()) { + // TODO: what if there's no current run in progress? should we just + // give up quietly, or should we make a noise of some kind? -- L.A. 4.4 + + currentRun.setFailed(); + currentRun.setFinishTime(currentTime); + } + } } diff --git a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java index f438bd10c14..b24be18264a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java @@ -10,6 +10,7 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.harvest.client.HarvestTimerInfo; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClientServiceBean; import java.io.Serializable; import java.net.InetAddress; import java.net.UnknownHostException; @@ -43,6 +44,8 @@ public class DataverseTimerServiceBean implements Serializable { HarvesterServiceBean harvesterService; @EJB DataverseServiceBean dataverseService; + @EJB + HarvestingClientServiceBean harvestingClientService; /*@EJB StudyServiceLocal studyService;*/ @@ -80,11 +83,18 @@ public void handleTimeout(javax.ejb.Timer timer) { HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); try { - logger.log(Level.INFO, "DO HARVESTING of dataverse " + info.getHarvestingDataverseId()); + logger.log(Level.INFO, "running a harvester client configured for dataverse " + info.getHarvestingDataverseId()); harvesterService.doHarvest(info.getHarvestingDataverseId()); } catch (Throwable e) { - dataverseService.setHarvestResult(info.getHarvestingDataverseId(), harvesterService.HARVEST_RESULT_FAILED); + // Harvester Service should be handling any error notifications, + // if/when things go wrong. + // (TODO: -- verify this logic; harvesterService may still be able + // to throw an IOException, if it could not run the harvest at all, + // or could not for whatever reason modify the database record... + // in this case we should, probably, log the error and try to send + // a mail notification. -- L.A. 4.4) + //dataverseService.setHarvestResult(info.getHarvestingDataverseId(), harvesterService.HARVEST_RESULT_FAILED); //mailService.sendHarvestErrorNotification(dataverseService.find().getSystemEmail(), dataverseService.find().getName()); logException(e, logger); } From 54b5cea00420366ae521f2bfce8d6fbf9feb7a0b Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Fri, 29 Apr 2016 17:17:54 -0400 Subject: [PATCH 10/37] API further reorganized; proper Commands added for retrieving, creating, modifying and deleting Harvesting Clients. --- .../iq/dataverse/DataverseServiceBean.java | 4 +- .../harvard/iq/dataverse/api/Datasets.java | 2 +- .../harvard/iq/dataverse/api/Harvesting.java | 213 +++++++++++++++--- .../impl/CreateHarvestingClientCommand.java | 14 +- .../impl/DeleteHarvestingClientCommand.java | 11 +- .../impl/GetHarvestingClientCommand.java | 6 +- .../impl/UpdateHarvestingClientCommand.java | 17 +- .../harvest/client/HarvestingClient.java | 13 ++ .../client/HarvestingClientServiceBean.java | 34 ++- .../iq/dataverse/util/json/JsonParser.java | 17 +- 10 files changed, 260 insertions(+), 71 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 003df01f87b..8b2b438ac2a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -419,7 +419,7 @@ public List findDataversesThatLinkToThisDatasetId(long datasetId) { */ public Map getAllHarvestedDataverseDescriptions(){ - String qstr = "SELECT dataverse_id, archiveDescription FROM harvestingDataverseConfig;"; + String qstr = "SELECT dataverse_id, archiveDescription FROM harvestingClient;"; List searchResults = null; try { @@ -454,7 +454,7 @@ public Map getAllHarvestedDataverseDescriptions(){ } public List getAllHarvestedDataverses() { - return em.createQuery("SELECT object(d) FROM Dataverse d, harvestingDataverseConfig c AS d WHERE c.dataverse_id IS NOT null AND c.dataverse_id=d.id order by d.id").getResultList(); + return em.createQuery("SELECT object(d) FROM Dataverse d, harvestingClient c AS d WHERE c.dataverse_id IS NOT null AND c.dataverse_id=d.id order by d.id").getResultList(); } public void populateDvSearchCard(SolrSearchResult solrSearchResult) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index e8e77d7dc1f..98d0af8bde3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -315,7 +315,7 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, boolean updateDraft = ds.getLatestVersion().isDraft(); DatasetVersion managedVersion = execCommand( updateDraft ? new UpdateDatasetVersionCommand(req, incomingVersion) - : new CreateDatasetVersionCommand(req, ds, incomingVersion)); + : new CreateDatasetVersionCommand(req, ds, incomingVersion)); return okResponse( json(managedVersion) ); } catch (JsonParseException ex) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java index 444bed6e390..8dcec82b644 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java @@ -5,25 +5,33 @@ import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; +import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; +import edu.harvard.iq.dataverse.engine.command.impl.GetHarvestingClientCommand; +import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClientServiceBean; +import edu.harvard.iq.dataverse.util.json.JsonParseException; import javax.json.JsonObjectBuilder; import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import edu.harvard.iq.dataverse.util.json.JsonPrinter; import java.io.IOException; -import java.math.BigDecimal; +import java.io.StringReader; import java.util.List; import javax.ejb.EJB; import javax.ejb.Stateless; import javax.json.Json; import javax.json.JsonArrayBuilder; +import javax.json.JsonObject; import javax.ws.rs.GET; +import javax.ws.rs.POST; +import javax.ws.rs.PUT; import javax.ws.rs.Path; import javax.ws.rs.PathParam; import javax.ws.rs.QueryParam; import javax.ws.rs.core.Response; @Stateless -@Path("harvest") +@Path("harvest/client") public class Harvesting extends AbstractApiBean { @@ -31,10 +39,161 @@ public class Harvesting extends AbstractApiBean { DataverseServiceBean dataverseService; @EJB HarvesterServiceBean harvesterService; + @EJB + HarvestingClientServiceBean harvestingClientService; + /* + * /api/harvest/client + * and + * /api/harvest/client/{nickname} + * will, by default, return a JSON record with the information about the + * configured remote archives. + * optionally, plain text output may be provided as well. + */ @GET - @Path("run/{dataverseAlias}") + @Path("") + public Response harvestingClients(@QueryParam("key") String apiKey) throws IOException { + + List harvestingClients = null; + try { + harvestingClients = harvestingClientService.getAllHarvestingClients(); + } catch (Exception ex) { + return errorResponse( Response.Status.BAD_REQUEST, "Caught an exception looking up configured harvesting clients; " + ex.getMessage() ); + } + + if (harvestingClients == null) { + // returning an empty list: + return okResponse(jsonObjectBuilder().add("harvestingClients","")); + } + + JsonArrayBuilder hcArr = Json.createArrayBuilder(); + + for (HarvestingClient harvestingClient : harvestingClients) { + // We already have this harvestingClient - wny do we need to + // execute this "Get Harvesting Client Command" in order to get it, + // again? - the purpose of the command is to run the request through + // the Authorization system, to verify that they actually have + // the permission to view this harvesting client config. -- L.A. 4.4 + HarvestingClient retrievedHarvestingClient = null; + try { + DataverseRequest req = createDataverseRequest(findUserOrDie()); + retrievedHarvestingClient = execCommand( new GetHarvestingClientCommand(req, harvestingClient)); + } catch (Exception ex) { + // Don't do anything. + // We'll just skip this one - since this means the user isn't + // authorized to view this client configuration. + } + + if (retrievedHarvestingClient != null) { + hcArr.add(harvestingConfigAsJson(retrievedHarvestingClient)); + } + } + + return okResponse(jsonObjectBuilder().add("harvestingClients", hcArr)); + } + + @GET + @Path("{nickName}") + public Response harvestingClient(@PathParam("nickName") String nickName, @QueryParam("key") String apiKey) throws IOException { + try { + HarvestingClient harvestingClient = harvestingClientService.findByNickname(nickName); + if (harvestingClient == null) { + return errorResponse( Response.Status.NOT_FOUND, "Harvesting client " + nickName + " not found."); + } + DataverseRequest req = createDataverseRequest(findUserOrDie()); + return okResponse(harvestingConfigAsJson(execCommand( new GetHarvestingClientCommand(req, harvestingClient)))); + + } catch (Exception ex) { + return errorResponse( Response.Status.BAD_REQUEST, "Caught an exception looking up harvesting client " + nickName + "; " + ex.getMessage() ); + } + } + + @POST + @Path("{nickName}") + public Response createHarvestingClient(String jsonBody, @PathParam("nickName") String nickName, @QueryParam("key") String apiKey) throws IOException, JsonParseException { + + try ( StringReader rdr = new StringReader(jsonBody) ) { + JsonObject json = Json.createReader(rdr).readObject(); + + HarvestingClient harvestingClient = new HarvestingClient(); + // TODO: check that it doesn't exist yet... + harvestingClient.setName(nickName); + String dataverseAlias = jsonParser().parseHarvestingClient(json, harvestingClient); + Dataverse ownerDataverse = dataverseService.findByAlias(dataverseAlias); + + if (ownerDataverse == null) { + return errorResponse(Response.Status.BAD_REQUEST, "No such dataverse: " + dataverseAlias); + } + + harvestingClient.setDataverse(ownerDataverse); + ownerDataverse.setHarvestingClientConfig(harvestingClient); + + DataverseRequest req = createDataverseRequest(findUserOrDie()); + HarvestingClient managedHarvestingClient = execCommand( new CreateHarvestingClientCommand(req, harvestingClient)); + return createdResponse( "/datasets/" + nickName, harvestingConfigAsJson(managedHarvestingClient)); + + } catch (JsonParseException ex) { + return errorResponse( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); + + } catch (WrappedResponse ex) { + return ex.getResponse(); + + } + + } + + @PUT + @Path("{nickName}") + public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") String nickName, @QueryParam("key") String apiKey) throws IOException, JsonParseException { + HarvestingClient harvestingClient = null; + try { + harvestingClient = harvestingClientService.findByNickname(nickName); + } catch (Exception ex) { + // We don't care what happened; we'll just assume we couldn't find it. + harvestingClient = null; + } + + if (harvestingClient == null) { + return errorResponse( Response.Status.NOT_FOUND, "Harvesting client " + nickName + " not found."); + } + + String ownerDataverseAlias = harvestingClient.getDataverse().getAlias(); + + try ( StringReader rdr = new StringReader(jsonBody) ) { + DataverseRequest req = createDataverseRequest(findUserOrDie()); + JsonObject json = Json.createReader(rdr).readObject(); + + String newDataverseAlias = jsonParser().parseHarvestingClient(json, harvestingClient); + + if (newDataverseAlias != null + && !newDataverseAlias.equals("") + && !newDataverseAlias.equals(ownerDataverseAlias)) { + return errorResponse(Response.Status.BAD_REQUEST, "Bad \"dataverseAlias\" supplied. Harvesting client "+nickName+" belongs to the dataverse "+ownerDataverseAlias); + } + HarvestingClient managedHarvestingClient = execCommand( new UpdateHarvestingClientCommand(req, harvestingClient)); + return createdResponse( "/datasets/" + nickName, harvestingConfigAsJson(managedHarvestingClient)); + + } catch (JsonParseException ex) { + return errorResponse( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); + + } catch (WrappedResponse ex) { + return ex.getResponse(); + + } + + } + + // TODO: + // add a @DELETE method + // (there is already a DeleteHarvestingClient command) + + // Methods for managing harvesting runs (jobs): + + + // This POST starts a new harvesting run: + @POST + @Path("{nickName}/run") public Response startHarvestingJob(@PathParam("dataverseAlias") String dataverseAlias, @QueryParam("key") String apiKey) throws IOException { try { @@ -70,44 +229,40 @@ public Response startHarvestingJob(@PathParam("dataverseAlias") String dataverse return this.accepted(); } - /* - * /api/harvest/status - * will, by default, return a JSON record with the information about the - * configured remote archives. - * optionally, plain text output will [/may] be provided as well. - */ - @GET - @Path("status") - public Response harvestingStatus() throws IOException { - //return this.accepted(); - - List harvestingDataverses = dataverseService.getAllHarvestedDataverses(); - if (harvestingDataverses == null) { - return okResponse(""); - } - - return okResponse(jsonObjectBuilder().add("remoteArchives", harvestingConfigsAsJsonArray(harvestingDataverses))); - } - + // This GET shows the status of the harvesting run in progress for this + // client, if present: + // @GET + // @Path("{nickName}/run") + // TODO: + + // This DELETE kills the harvesting run in progress for this client, + // if present: + // @DELETE + // @Path("{nickName}/run") + // TODO: + + /* Auxiliary, helper methods: */ + public static JsonArrayBuilder harvestingConfigsAsJsonArray(List harvestingDataverses) { JsonArrayBuilder hdArr = Json.createArrayBuilder(); for (Dataverse hd : harvestingDataverses) { - hdArr.add(harvestingConfigAsJson(hd)); + hdArr.add(harvestingConfigAsJson(hd.getHarvestingClientConfig())); } return hdArr; } - public static JsonObjectBuilder harvestingConfigAsJson(Dataverse dataverse) { - HarvestingClient harvestingConfig = dataverse.getHarvestingClientConfig(); + public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) { if (harvestingConfig == null) { return null; } - return jsonObjectBuilder().add("nickname", harvestingConfig.getName()). - add("dataverseAlias", dataverse.getAlias()). + return jsonObjectBuilder().add("nickName", harvestingConfig.getName()). + add("dataverseAlias", harvestingConfig.getDataverse().getAlias()). add("type", harvestingConfig.getHarvestType()). - add("harvestURL", harvestingConfig.getHarvestingUrl()). + add("harvestUrl", harvestingConfig.getHarvestingUrl()). + add("archiveUrl", harvestingConfig.getArchiveUrl()). + add("archiveDescription",harvestingConfig.getArchiveDescription()). add("metadataFormat", harvestingConfig.getMetadataPrefix()). add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()). add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none"). diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java index c4d09b2ef6e..6a5f0d31037 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java @@ -17,17 +17,21 @@ public class CreateHarvestingClientCommand extends AbstractCommand { private final Dataverse dv; + private final HarvestingClient harvestingClient; - public CreateHarvestingClientCommand(DataverseRequest aRequest, Dataverse motherDataverse) { - super(aRequest, motherDataverse); - dv = motherDataverse; + public CreateHarvestingClientCommand(DataverseRequest aRequest, HarvestingClient harvestingClient) { + super(aRequest, harvestingClient.getDataverse()); + this.harvestingClient = harvestingClient; + dv = harvestingClient.getDataverse(); } @Override public HarvestingClient execute(CommandContext ctxt) throws CommandException { // TODO: check if the harvesting client config is legit; - // and that it is new. - return ctxt.dataverses().save(dv).getHarvestingClientConfig(); + // and that it is indeed new and unique? + // (may not be necessary - as the uniqueness should be enforced by + // the persistence layer... -- L.A. 4.4) + return ctxt.em().merge(this.harvestingClient); } } \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java index 38999fb45c8..01a78deec4a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java @@ -18,17 +18,18 @@ public class DeleteHarvestingClientCommand extends AbstractVoidCommand { private final Dataverse motherDataverse; + private final HarvestingClient harvestingClient; - public DeleteHarvestingClientCommand(DataverseRequest aRequest, Dataverse motherDataverse) { - super(aRequest, motherDataverse); - this.motherDataverse = motherDataverse; + public DeleteHarvestingClientCommand(DataverseRequest aRequest, HarvestingClient harvestingClient) { + super(aRequest, harvestingClient.getDataverse()); + this.motherDataverse = harvestingClient.getDataverse(); + this.harvestingClient = harvestingClient; } @Override public void executeImpl(CommandContext ctxt) throws CommandException { - HarvestingClient harvestingClient = motherDataverse.getHarvestingClientConfig(); if (harvestingClient == null) { - throw new IllegalCommandException("No harvesting client is configured for dataverse "+motherDataverse.getAlias(), this); + throw new IllegalCommandException("DeleteHarvestingClientCommand: attempted to execute with null harvesting client; dataverse: "+motherDataverse.getAlias(), this); } motherDataverse.setHarvestingClientConfig(null); ctxt.em().remove(harvestingClient); diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java index d0929e2ec1a..d3e253b59ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java @@ -26,9 +26,9 @@ public class GetHarvestingClientCommand extends AbstractCommand{ private final Dataverse ownerDataverse; - public GetHarvestingClientCommand(DataverseRequest aRequest, Dataverse ownerDataverse) { - super(aRequest, ownerDataverse); - this.ownerDataverse = ownerDataverse; + public GetHarvestingClientCommand(DataverseRequest aRequest, HarvestingClient harvestingClient) { + super(aRequest, harvestingClient.getDataverse()); + this.ownerDataverse = harvestingClient.getDataverse(); } @Override diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java index 77c8bf75e57..0e699de40e3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java +++ b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java @@ -17,17 +17,20 @@ public class UpdateHarvestingClientCommand extends AbstractCommand { private final Dataverse dv; - - public UpdateHarvestingClientCommand(DataverseRequest aRequest, Dataverse motherDataverse) { - super(aRequest, motherDataverse); - dv = motherDataverse; + private final HarvestingClient harvestingClient; + + public UpdateHarvestingClientCommand(DataverseRequest aRequest, HarvestingClient harvestingClient) { + super(aRequest, harvestingClient.getDataverse()); + this.harvestingClient = harvestingClient; + dv = harvestingClient.getDataverse(); } @Override public HarvestingClient execute(CommandContext ctxt) throws CommandException { - // TODO: check if the harvesting client config attached to the dataverse - // is legit; and that it already exists. - return ctxt.em().merge(dv).getHarvestingClientConfig(); + // TODO: check that the harvesting client config is attached to a legit + // dataverse; and that we are in fact modifying a config that already + // exists. -- L.A. 4.4 + return ctxt.em().merge(this.harvestingClient); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index 2d31d8067a3..a5923ff9d3d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -20,12 +20,17 @@ import javax.persistence.Id; import javax.persistence.Index; import javax.persistence.JoinColumn; +import javax.persistence.NamedQueries; +import javax.persistence.NamedQuery; import javax.persistence.OneToMany; import javax.persistence.OneToOne; import javax.persistence.OrderBy; import javax.persistence.Table; import javax.persistence.Temporal; import javax.persistence.TemporalType; +import javax.validation.constraints.Pattern; +import javax.validation.constraints.Size; +import org.hibernate.validator.constraints.NotBlank; /** * @@ -37,6 +42,9 @@ , @Index(columnList="harveststyle") , @Index(columnList="harvestingurl")}) @Entity +@NamedQueries({ + @NamedQuery(name = "HarvestingClient.findByNickname", query="SELECT hc FROM HarvestingClient hc WHERE LOWER(hc.name)=:nickName") +}) public class HarvestingClient implements Serializable { private static final long serialVersionUID = 1L; @@ -89,6 +97,11 @@ public void setDataverse(Dataverse dataverse) { this.dataverse = dataverse; } + @NotBlank(message = "Please enter a nickname.") + @Column(nullable = false, unique=true) + @Size(max = 30, message = "Nickname must be at most 30 characters.") + @Pattern.List({@Pattern(regexp = "[a-zA-Z0-9\\_\\-]*", message = "Found an illegal character(s). Valid characters are a-Z, 0-9, '_', and '-'."), + @Pattern(regexp=".*\\D.*", message="Nickname should not be a number")}) private String name; public String getName() { diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java index df97463172e..6a905ba7033 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -4,6 +4,7 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import java.util.ArrayList; import java.util.Date; +import java.util.List; import java.util.logging.Logger; import javax.ejb.EJB; import javax.ejb.Stateless; @@ -12,6 +13,8 @@ import javax.faces.bean.ManagedBean; import javax.inject.Named; import javax.persistence.EntityManager; +import javax.persistence.NoResultException; +import javax.persistence.NonUniqueResultException; import javax.persistence.PersistenceContext; /** @@ -31,17 +34,21 @@ public class HarvestingClientServiceBean { private EntityManager em; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvestingClinetServiceBean"); - - /* let's try and live without this method: - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestResult(Long hdId, String result) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); // ?? - if (hd.isHarvested()) { - hd.getHarvestingClientConfig().setHarvestResult(result); + + public HarvestingClient findByNickname(String nickName) { + try { + return em.createNamedQuery("HarvestingClient.findByNickname", HarvestingClient.class) + .setParameter("nickName", nickName.toLowerCase()) + .getSingleResult(); + } catch ( NoResultException|NonUniqueResultException ex ) { + logger.fine("Unable to find a single harvesting client by nickname \"" + nickName + "\": " + ex); + return null; } } - */ + + public List getAllHarvestingClients() { + return em.createQuery("SELECT object(c) FROM Dataverse d, harvestingClient c AS d WHERE c.dataverse_id IS NOT null AND c.dataverse_id=d.id order by d.id").getResultList(); + } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void resetHarvestInProgress(Long hdId) { @@ -80,15 +87,6 @@ public void setHarvestInProgress(Long hdId, Date startTime) { harvestingClient.getRunHistory().add(currentRun); } - /* - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setLastHarvestTime(Long hdId, Date lastHarvestTime) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - if (hd.isHarvested()) { - hd.getHarvestingClientConfig().setLastHarvestTime(lastHarvestTime); - } - }*/ @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) public void setHarvestSuccess(Long hdId, Date currentTime, int harvestedCount, int failedCount) { diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index ebbe1117c3b..4b77feb24ff 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -19,6 +19,7 @@ import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddressRange; +import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import java.io.StringReader; import java.text.ParseException; @@ -540,5 +541,19 @@ Long parseLong(String str) throws NumberFormatException { int parsePrimitiveInt(String str, int defaultValue) { return str == null ? defaultValue : Integer.parseInt(str); } - + + public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingClient) throws JsonParseException { + + String dataverseAlias = obj.getString("dataverseAlias",null); + + harvestingClient.setName(obj.getString("nickName",null)); + harvestingClient.setHarvestType(obj.getString("type",null)); + harvestingClient.setHarvestingUrl(obj.getString("harvestUrl",null)); + harvestingClient.setArchiveUrl(obj.getString("archiveUrl",null)); + harvestingClient.setArchiveDescription(obj.getString("archiveDescription")); + harvestingClient.setMetadataPrefix(obj.getString("metadataFormat",null)); + harvestingClient.setHarvestingSet(obj.getString("set",null)); + + return dataverseAlias; + } } From c4b12b1311872b235a392d31ad215a62d250e6f8 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Sun, 1 May 2016 23:53:40 -0400 Subject: [PATCH 11/37] Bug fixes; Improved diagnostics in the Harvesting API; --- .../iq/dataverse/DataverseServiceBean.java | 2 +- .../harvard/iq/dataverse/api/Harvesting.java | 48 +++++++++++++++---- .../client/HarvestingClientServiceBean.java | 11 +++-- 3 files changed, 48 insertions(+), 13 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 8b2b438ac2a..34cb48e9f68 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -454,7 +454,7 @@ public Map getAllHarvestedDataverseDescriptions(){ } public List getAllHarvestedDataverses() { - return em.createQuery("SELECT object(d) FROM Dataverse d, harvestingClient c AS d WHERE c.dataverse_id IS NOT null AND c.dataverse_id=d.id order by d.id").getResultList(); + return em.createQuery("SELECT object(d) FROM Dataverse d, harvestingClient c AS d WHERE c.dataverse.id=d.id order by d.id").getResultList(); } public void populateDvSearchCard(SolrSearchResult solrSearchResult) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java index 8dcec82b644..9734cbd51eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java @@ -17,6 +17,7 @@ import java.io.IOException; import java.io.StringReader; import java.util.List; +import java.util.logging.Logger; import javax.ejb.EJB; import javax.ejb.Stateless; import javax.json.Json; @@ -42,7 +43,7 @@ public class Harvesting extends AbstractApiBean { @EJB HarvestingClientServiceBean harvestingClientService; - + private static final Logger logger = Logger.getLogger(Harvesting.class.getName()); /* * /api/harvest/client * and @@ -59,7 +60,7 @@ public Response harvestingClients(@QueryParam("key") String apiKey) throws IOExc try { harvestingClients = harvestingClientService.getAllHarvestingClients(); } catch (Exception ex) { - return errorResponse( Response.Status.BAD_REQUEST, "Caught an exception looking up configured harvesting clients; " + ex.getMessage() ); + return errorResponse( Response.Status.INTERNAL_SERVER_ERROR, "Caught an exception looking up configured harvesting clients; " + ex.getMessage() ); } if (harvestingClients == null) { @@ -96,16 +97,45 @@ public Response harvestingClients(@QueryParam("key") String apiKey) throws IOExc @GET @Path("{nickName}") public Response harvestingClient(@PathParam("nickName") String nickName, @QueryParam("key") String apiKey) throws IOException { + + HarvestingClient harvestingClient = null; try { - HarvestingClient harvestingClient = harvestingClientService.findByNickname(nickName); - if (harvestingClient == null) { - return errorResponse( Response.Status.NOT_FOUND, "Harvesting client " + nickName + " not found."); - } - DataverseRequest req = createDataverseRequest(findUserOrDie()); - return okResponse(harvestingConfigAsJson(execCommand( new GetHarvestingClientCommand(req, harvestingClient)))); + harvestingClient = harvestingClientService.findByNickname(nickName); + } catch (Exception ex) { + logger.warning("Exception caught looking up harvesting client " + nickName + ": " + ex.getMessage()); + return errorResponse( Response.Status.BAD_REQUEST, "Internal error: failed to look up harvesting client " + nickName + "."); + } + + if (harvestingClient == null) { + return errorResponse(Response.Status.NOT_FOUND, "Harvesting client " + nickName + " not found."); + } + + HarvestingClient retrievedHarvestingClient = null; + + try { + // findUserOrDie() and execCommand() both throw WrappedResponse + // exception, that already has a proper HTTP response in it. + retrievedHarvestingClient = execCommand(new GetHarvestingClientCommand(createDataverseRequest(findUserOrDie()), harvestingClient)); + logger.info("retrieved Harvesting Client " + retrievedHarvestingClient.getName() + " with the GetHarvestingClient command."); + } catch (WrappedResponse wr) { + return wr.getResponse(); + } catch (Exception ex) { + logger.warning("Unknown exception caught while executing GetHarvestingClientCommand: "+ex.getMessage()); + retrievedHarvestingClient = null; + } + + if (retrievedHarvestingClient == null) { + return errorResponse( Response.Status.BAD_REQUEST, + "Internal error: failed to retrieve harvesting client " + nickName + "."); + } + + try { + return okResponse(harvestingConfigAsJson(retrievedHarvestingClient)); } catch (Exception ex) { - return errorResponse( Response.Status.BAD_REQUEST, "Caught an exception looking up harvesting client " + nickName + "; " + ex.getMessage() ); + logger.warning("Unknown exception caught while trying to format harvesting client config as json: "+ex.getMessage()); + return errorResponse( Response.Status.BAD_REQUEST, + "Internal error: failed to produce output for harvesting client " + nickName + "."); } } diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java index 6a905ba7033..853c511cd72 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -23,9 +23,9 @@ * * Dedicated service for managing Harvesting Client Configurations */ -@Stateless(name = "harvesterService") +@Stateless @Named -@ManagedBean +//@ManagedBean public class HarvestingClientServiceBean { @EJB DataverseServiceBean dataverseService; @@ -47,7 +47,12 @@ public HarvestingClient findByNickname(String nickName) { } public List getAllHarvestingClients() { - return em.createQuery("SELECT object(c) FROM Dataverse d, harvestingClient c AS d WHERE c.dataverse_id IS NOT null AND c.dataverse_id=d.id order by d.id").getResultList(); + try { + return em.createQuery("SELECT object(c) FROM HarvestingClient AS c ORDER BY c.id").getResultList(); + } catch (Exception ex) { + logger.warning("Unknown exception caught while looking up configured Harvesting Clients: "+ex.getMessage()); + } + return null; } @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) From 58ab1cfe8b9d3f9cf4b3d0f07cffca67b373e6a9 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 4 May 2016 16:31:18 -0400 Subject: [PATCH 12/37] Updated Bundle for Dataset Publish popups --- src/main/java/Bundle.properties | 6 +++--- src/main/webapp/dataset.xhtml | 28 ++++++++++++++++++++-------- 2 files changed, 23 insertions(+), 11 deletions(-) diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index 5a6d571d06e..584cc9eb1ae 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -775,9 +775,9 @@ dataset.majorRelease=Major Release dataset.minorRelease=Minor Release dataset.majorRelease.tip=Due to the nature of changes to the current draft this will be a major release ({0}) dataset.mayNotBePublished=Cannot publish dataset. -dataset.mayNotPublish.administrator= This dataset cannot be published until {1} is published by its administrator. -dataset.mayNotPublish.both= This dataset cannot be published until {1} is published. Would you like to publish both right now? -dataset.mayNotPublish.twoGenerations= This dataset cannot be published until {1} and {3} are published. +dataset.mayNotPublish.administrator= This dataset cannot be published until {0} is published by its administrator. +dataset.mayNotPublish.both= This dataset cannot be published until {0} is published. Would you like to publish both right now? +dataset.mayNotPublish.twoGenerations= This dataset cannot be published until {0} and {1} are published. dataset.mayNotBePublished.both.button=Yes, Publish Both dataset.viewVersion.unpublished=View Unpublished Version dataset.viewVersion.published=View Published Version diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 0642a52fe21..6f8dd18d663 100755 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -1316,8 +1316,11 @@

- - + + + + +

@@ -1364,8 +1367,11 @@

- - + + + + +

@@ -1377,10 +1383,16 @@

- - - - + + + + + + + + + +

From 3ccd3a0019811cbcea3f25aa2469ecaec7b34010 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Thu, 5 May 2016 17:42:17 -0400 Subject: [PATCH 13/37] experimental oai harvesting implementation based on lyncode XOAI. --- pom.xml | 14 +- .../harvest/client/HarvesterServiceBean.java | 388 +++++------------- 2 files changed, 119 insertions(+), 283 deletions(-) diff --git a/pom.xml b/pom.xml index 2fbef369667..862d8fffe48 100644 --- a/pom.xml +++ b/pom.xml @@ -378,7 +378,19 @@ oclc-harvester2 0.1.12 - + + + + com.lyncode + xoai-common + 4.1.0 + + + com.lyncode + xoai-service-provider + 4.1.0 + + diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 24cf96fbd65..63cd2e380ec 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -19,6 +19,7 @@ import java.util.Date; import java.util.Iterator; import java.util.List; +import java.net.URLEncoder; import java.util.logging.FileHandler; import java.util.logging.Level; import java.util.logging.Logger; @@ -38,6 +39,14 @@ import org.apache.commons.lang.mutable.MutableBoolean; import org.xml.sax.SAXException; +import com.lyncode.xoai.model.oaipmh.Granularity; +import com.lyncode.xoai.model.oaipmh.Header; +import com.lyncode.xoai.serviceprovider.ServiceProvider; +import com.lyncode.xoai.serviceprovider.model.Context; +import com.lyncode.xoai.serviceprovider.client.HttpOAIClient; +import com.lyncode.xoai.serviceprovider.exceptions.BadArgumentException; +import com.lyncode.xoai.serviceprovider.parameters.ListIdentifiersParameters; + /** * * @author Leonid Andreev @@ -64,9 +73,7 @@ public class HarvesterServiceBean { public static final String HARVEST_RESULT_SUCCESS="success"; public static final String HARVEST_RESULT_FAILED="failed"; - - private long processedSizeThisBatch = 0; - private List harvestedDatasetIdsThisBatch = null; + public HarvesterServiceBean() { } @@ -183,8 +190,7 @@ public void doHarvest(Long dataverseId) throws IOException { hdLogger.addHandler(fileHandler); List harvestedDatasetIds = null; - this.processedSizeThisBatch = 0; - this.harvestedDatasetIdsThisBatch = new ArrayList(); + List harvestedDatasetIdsThisBatch = new ArrayList(); List failedIdentifiers = new ArrayList(); Date harvestStartTime = new Date(); @@ -198,19 +204,11 @@ public void doHarvest(Long dataverseId) throws IOException { } else { harvestingClientService.resetHarvestInProgress(harvestingDataverse.getId()); - String until = null; // If we don't set until date, we will get all the changes since the last harvest. - String from = null; - // TODO: should it be last *non-empty* time? -- L.A. 4.4 - Date lastSuccessfulHarvestTime = harvestingClientConfig.getLastSuccessfulHarvestTime(); - if (lastSuccessfulHarvestTime != null) { - from = formatter.format(lastSuccessfulHarvestTime); - } harvestingClientService.setHarvestInProgress(harvestingDataverse.getId(), harvestStartTime); - hdLogger.log(Level.INFO, "BEGIN HARVEST..., oaiUrl=" + harvestingClientConfig.getArchiveUrl() + ",set=" + harvestingClientConfig.getHarvestingSet() + ", metadataPrefix=" + harvestingClientConfig.getMetadataPrefix() + ", from=" + from + ", until=" + until); - + if (harvestingClientConfig.isOai()) { - harvestedDatasetIds = harvestOAI(harvestingDataverse, hdLogger, from, until, harvestErrorOccurred, failedIdentifiers); + harvestedDatasetIds = harvestOAI(harvestingClientConfig, hdLogger, harvestErrorOccurred, failedIdentifiers, harvestedDatasetIdsThisBatch); } else { throw new IOException("Unsupported harvest type"); @@ -257,140 +255,121 @@ public void doHarvest(Long dataverseId) throws IOException { /** * - * @param dataverse the dataverse to harvest into - * @param from get updated studies from this beginning date - * @param until get updated studies until this end date + * @param harvestingClient the harvesting client object + * @param hdLogger custom logger (specific to this harvesting run) * @param harvestErrorOccurred have we encountered any errors during harvest? * @param failedIdentifiers Study Identifiers for failed "GetRecord" requests */ - private List harvestOAI(Dataverse dataverse, Logger hdLogger, String from, String until, MutableBoolean harvestErrorOccurred, List failedIdentifiers) - throws IOException, ParserConfigurationException,SAXException, TransformerException { - + private List harvestOAI(HarvestingClient harvestingClient, Logger hdLogger, MutableBoolean harvestErrorOccurred, List failedIdentifiers, List harvestedDatasetIdsThisBatch) + throws IOException, ParserConfigurationException, SAXException, TransformerException { + List harvestedDatasetIds = new ArrayList(); - - /* - ResumptionTokenType resumptionToken = null; - - do { - //resumptionToken = harvesterService.harvestFromIdentifiers(hdLogger, resumptionToken, dataverse, from, until, harvestedDatasetIds, failedIdentifiers, harvestErrorOccurred - resumptionToken = harvestFromIdentifiers(hdLogger, resumptionToken, dataverse, from, until, harvestedDatasetIds, failedIdentifiers, harvestErrorOccurred); - } while (resumptionToken != null && !resumptionToken.equals("")); - - hdLogger.log(Level.INFO, "COMPLETED HARVEST, oaiUrl=" + dataverse.getServerUrl() + ",set=" + dataverse.getHarvestingSet() + ", metadataPrefix=" + dataverse.getHarvestFormatType().getMetadataPrefix() + ", from=" + from + ", until=" + until); - - */ - return harvestedDatasetIds; - - } + Long processedSizeThisBatch = 0L; - /* - @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) - public ResumptionTokenType harvestFromIdentifiers(Logger hdLogger, ResumptionTokenType resumptionToken, HarvestingDataverse dataverse, String from, String until, List harvestedDatasetIds, List failedIdentifiers, MutableBoolean harvestErrorOccurred) - throws java.io.IOException, ParserConfigurationException, SAXException, TransformerException, JAXBException { - String encodedSet = dataverse.getHarvestingSet() == null ? null : URLEncoder.encode(dataverse.getHarvestingSet(), "UTF-8"); - ListIdentifiers listIdentifiers = null; - - if (resumptionToken == null) { - listIdentifiers = new ListIdentifiers(dataverse.getServerUrl(), - from, - until, - encodedSet, - URLEncoder.encode(dataverse.getHarvestFormatType().getMetadataPrefix(), "UTF-8")); - } else { - hdLogger.log(Level.INFO, "harvestFromIdentifiers(), resumptionToken=" + resumptionToken.getValue()); - listIdentifiers = new ListIdentifiers(dataverse.getServerUrl(), resumptionToken.getValue()); - } + + String baseOaiUrl = harvestingClient.getHarvestingUrl(); + String metadataPrefix = harvestingClient.getMetadataPrefix(); + Date fromDate = harvestingClient.getLastNonEmptyHarvestTime(); + + String set = harvestingClient.getHarvestingSet() == null ? null : URLEncoder.encode(harvestingClient.getHarvestingSet(), "UTF-8"); + + hdLogger.log(Level.INFO, "BEGIN HARVEST..., oaiUrl=" + baseOaiUrl + ",set=" + set + ", metadataPrefix=" + metadataPrefix + ", from=" + fromDate); - Document doc = listIdentifiers.getDocument(); + ListIdentifiersParameters parameters = buildParams(metadataPrefix, set, fromDate); + ServiceProvider serviceProvider = getServiceProvider(baseOaiUrl, Granularity.Second); - // JAXBContext jc = JAXBContext.newInstance("edu.harvard.hmdc.vdcnet.jaxb.oai"); - // Unmarshaller unmarshaller = jc.createUnmarshaller(); - JAXBElement unmarshalObj = (JAXBElement) unmarshaller.unmarshal(doc); - OAIPMHtype oaiObj = (OAIPMHtype) unmarshalObj.getValue(); + try { + for (Iterator
idIter = serviceProvider.listIdentifiers(parameters); idIter.hasNext();) { - if (oaiObj.getError() != null && oaiObj.getError().size() > 0) { - if (oaiObj.getError().get(0).getCode().equals(OAIPMHerrorcodeType.NO_RECORDS_MATCH)) { - hdLogger.info("ListIdentifiers returned NO_RECORDS_MATCH - no studies found to be harvested."); - } else { - handleOAIError(hdLogger, oaiObj, "calling listIdentifiers, oaiServer= " + dataverse.getServerUrl() + ",from=" + from + ",until=" + until + ",encodedSet=" + encodedSet + ",format=" + dataverse.getHarvestFormatType().getMetadataPrefix()); - throw new EJBException("Received OAI Error response calling ListIdentifiers"); - } - } else { - ListIdentifiersType listIdentifiersType = oaiObj.getListIdentifiers(); - if (listIdentifiersType != null) { - resumptionToken = listIdentifiersType.getResumptionToken(); - for (Iterator it = listIdentifiersType.getHeader().iterator(); it.hasNext();) { - HeaderType header = (HeaderType) it.next(); - MutableBoolean getRecordErrorOccurred = new MutableBoolean(false); - Long studyId = getRecord(hdLogger, dataverse, header.getIdentifier(), dataverse.getHarvestFormatType().getMetadataPrefix(), getRecordErrorOccurred); - if (studyId != null) { - harvestedDatasetIds.add(studyId); - } - if (getRecordErrorOccurred.booleanValue()==true) { - failedIdentifiers.add(header.getIdentifier()); - } - + Header h = idIter.next(); + String identifier = h.getIdentifier(); + hdLogger.fine("identifier: " + identifier); + + // Retrieve and process this record with a separate GetRecord call: + MutableBoolean getRecordErrorOccurred = new MutableBoolean(false); + Long datasetId = getRecord(hdLogger, harvestingClient, identifier, metadataPrefix, getRecordErrorOccurred, processedSizeThisBatch); + if (datasetId != null) { + harvestedDatasetIds.add(datasetId); + } + if (getRecordErrorOccurred.booleanValue() == true) { + failedIdentifiers.add(identifier); + } + + if ( harvestedDatasetIdsThisBatch == null ) { + harvestedDatasetIdsThisBatch = new ArrayList(); + } + harvestedDatasetIdsThisBatch.add(datasetId); + + // reindexing in batches? - this is from DVN 3; + // we may not need it anymore. + if ( processedSizeThisBatch > 10000000 ) { + + hdLogger.log(Level.INFO, "REACHED CONTENT BATCH SIZE LIMIT; calling index ("+ harvestedDatasetIdsThisBatch.size()+" datasets in the batch)."); + //indexService.updateIndexList(this.harvestedDatasetIdsThisBatch); + hdLogger.log(Level.INFO, "REINDEX DONE."); + + + processedSizeThisBatch = 0L; + harvestedDatasetIdsThisBatch = null; } } + } catch (BadArgumentException e) { + throw new IOException("Incorrectly formatted OAI parameter", e); } - String logMsg = "Returning from harvestFromIdentifiers"; - - if (resumptionToken == null) { - logMsg += " resumptionToken is null"; - } else if (!StringUtil.isEmpty(resumptionToken.getValue())) { - logMsg += " resumptionToken is " + resumptionToken.getValue(); - } else { - // Some OAIServers return an empty resumptionToken element when all - // the identifiers have been sent, so need to check for this, and - // treat it as if resumptiontoken is null. - logMsg += " resumptionToken is empty, setting return value to null."; - resumptionToken = null; - } - hdLogger.info(logMsg); - return resumptionToken; + + hdLogger.log(Level.INFO, "COMPLETED HARVEST, oaiUrl=" + baseOaiUrl + ",set=" + set + ", metadataPrefix=" + metadataPrefix + ", from=" + fromDate); + + return harvestedDatasetIds; + } - */ - - /* - private void handleOAIError(Logger hdLogger, OAIPMHtype oaiObj, String message) { - for (Iterator it = oaiObj.getError().iterator(); it.hasNext();) { - OAIPMHerrorType error = (OAIPMHerrorType) it.next(); - message += ", error code: " + error.getCode(); - message += ", error value: " + error.getValue(); - hdLogger.log(Level.SEVERE, message); + + private ServiceProvider getServiceProvider(String baseOaiUrl, Granularity oaiGranularity) { + Context context = new Context(); - } + context.withBaseUrl(baseOaiUrl); + context.withGranularity(oaiGranularity); + context.withOAIClient(new HttpOAIClient(baseOaiUrl)); + + ServiceProvider serviceProvider = new ServiceProvider(context); + return serviceProvider; } - */ + + /** + * Creates an XOAI parameters object for the ListIdentifiers call + * + * @param metadataPrefix + * @param set + * @param from + * @return ListIdentifiersParameters + */ + private ListIdentifiersParameters buildParams(String metadataPrefix, String set, Date from) { + ListIdentifiersParameters mip = ListIdentifiersParameters.request(); + mip.withMetadataPrefix(metadataPrefix); - /* - @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) - public Long getRecord(HarvestingDataverse dataverse, String identifier, String metadataPrefix) { - return getRecord(logger, dataverse, identifier, metadataPrefix, null); + if (from != null) { + mip.withFrom(from); + } + + if (set != null) { + mip.withSetSpec(set); + } + return mip; } - */ + @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) - public Long getRecord(Logger hdLogger, Dataverse dataverse, String identifier, String metadataPrefix, MutableBoolean recordErrorOccurred) { + public Long getRecord(Logger hdLogger, HarvestingClient harvestingClient, String identifier, String metadataPrefix, MutableBoolean recordErrorOccurred, Long processedSizeThisBatch) { String errMessage = null; - - HarvestingClient harvestingConfig = dataverse.getHarvestingClientConfig(); - - if (harvestingConfig == null) { - errMessage = "Could not find Harvesting Config for Dataverse id="+dataverse.getId(); - hdLogger.log(Level.SEVERE, errMessage); - return null; - } - Dataset harvestedDataset = null; - String oaiUrl = harvestingConfig.getHarvestingUrl(); + String oaiUrl = harvestingClient.getHarvestingUrl(); + Dataverse parentDataverse = harvestingClient.getDataverse(); + try { hdLogger.log(Level.INFO, "Calling GetRecord: oaiUrl =" + oaiUrl + "?verb=GetRecord&identifier=" + identifier + "&metadataPrefix=" + metadataPrefix); FastGetRecord record = new FastGetRecord(oaiUrl, identifier, metadataPrefix); errMessage = record.getErrorMessage(); - //errMessage=null; if (errMessage != null) { hdLogger.log(Level.SEVERE, "Error calling GetRecord - " + errMessage); @@ -409,26 +388,10 @@ public Long getRecord(Logger hdLogger, Dataverse dataverse, String identifier, S harvestedDataset = null; // TODO: !!! import + hdLogger.log(Level.INFO, "Harvest Successful for identifier " + identifier); - - this.processedSizeThisBatch += record.getMetadataFile().length(); - if ( this.harvestedDatasetIdsThisBatch == null ) { - this.harvestedDatasetIdsThisBatch = new ArrayList(); - } - this.harvestedDatasetIdsThisBatch.add(harvestedDataset.getId()); - - // reindexing in batches? - this is from DVN 3; - // we may not need it anymore. - if ( this.processedSizeThisBatch > 10000000 ) { - - hdLogger.log(Level.INFO, "REACHED CONTENT BATCH SIZE LIMIT; calling index ("+this.harvestedDatasetIdsThisBatch.size()+" studies in the batch)."); - //indexService.updateIndexList(this.harvestedDatasetIdsThisBatch); - hdLogger.log(Level.INFO, "REINDEX DONE."); - - - this.processedSizeThisBatch = 0; - this.harvestedDatasetIdsThisBatch = null; - } + + processedSizeThisBatch += record.getMetadataFile().length(); } } catch (Throwable e) { errMessage = "Exception processing getRecord(), oaiUrl=" + oaiUrl + ",identifier=" + identifier + " " + e.getClass().getName() + " " + e.getMessage(); @@ -440,6 +403,7 @@ public Long getRecord(Logger hdLogger, Dataverse dataverse, String identifier, S // If we got an Error from the OAI server or an exception happened during import, then // set recordErrorOccurred to true (if recordErrorOccurred is being used) // otherwise throw an exception (if recordErrorOccurred is not used, i.e null) + if (errMessage != null) { if (recordErrorOccurred != null) { recordErrorOccurred.setValue(true); @@ -451,122 +415,6 @@ public Long getRecord(Logger hdLogger, Dataverse dataverse, String identifier, S return harvestedDataset != null ? harvestedDataset.getId() : null; } - - /* - public List getMetadataFormats(String oaiUrl) { - JAXBElement unmarshalObj; - try { - - Document doc = new ListMetadataFormats(oaiUrl).getDocument(); - JAXBContext jc = JAXBContext.newInstance("edu.harvard.hmdc.vdcnet.jaxb.oai"); - Unmarshaller unmarshaller = jc.createUnmarshaller(); - unmarshalObj = (JAXBElement) unmarshaller.unmarshal(doc); - } catch (TransformerException ex) { - throw new EJBException(ex); - } catch (ParserConfigurationException ex) { - throw new EJBException(ex); - } catch (JAXBException ex) { - throw new EJBException(ex); - } catch (SAXException ex) { - throw new EJBException(ex); - } catch (IOException ex) { - throw new EJBException(ex); - } - - OAIPMHtype OAIObj = (OAIPMHtype) unmarshalObj.getValue(); - if (OAIObj.getError()!=null && OAIObj.getError().size()>0) { - List errList = OAIObj.getError(); - String errMessage=""; - for (OAIPMHerrorType error : OAIObj.getError()){ - errMessage += error.getCode()+ " " +error.getValue(); - } - throw new EJBException(errMessage); - } - ListMetadataFormatsType listMetadataFormats = OAIObj.getListMetadataFormats(); - List formats = null; - if (listMetadataFormats != null) { - formats = new ArrayList(); - for (Iterator it = listMetadataFormats.getMetadataFormat().iterator(); it.hasNext();) { - // Object elem = it.next(); - MetadataFormatType elem = (MetadataFormatType) it.next(); - formats.add(elem.getMetadataPrefix()); - } - } - return formats; - } - */ - - /** - * - * SetDetailBean returned rather than the ListSetsType because we get strange errors when trying - * to refer to JAXB generated classes in both Web and EJB tiers. - */ - /* - public List getSets(String oaiUrl) { - JAXBElement unmarshalObj = null; - - try { - ListSets listSets = new ListSets(oaiUrl); - int nodeListLength = listSets.getErrors().getLength(); - if (nodeListLength==1) { - System.out.println("err Node: "+ listSets.getErrors().item(0)); - } - - - Document doc = new ListSets(oaiUrl).getDocument(); - JAXBContext jc = JAXBContext.newInstance("edu.harvard.hmdc.vdcnet.jaxb.oai"); - Unmarshaller unmarshaller = jc.createUnmarshaller(); - unmarshalObj = (JAXBElement) unmarshaller.unmarshal(doc); - } catch (ParserConfigurationException ex) { - throw new EJBException(ex); - } catch (SAXException ex) { - throw new EJBException(ex); - } catch (TransformerException ex) { - throw new EJBException(ex); - } catch (IOException ex) { - throw new EJBException(ex); - } catch (JAXBException ex) { - throw new EJBException(ex); - } - List sets = null; - Object value = unmarshalObj.getValue(); - - Package valPackage = value.getClass().getPackage(); - if (value instanceof edu.harvard.hmdc.vdcnet.jaxb.oai.OAIPMHtype) { - OAIPMHtype OAIObj = (OAIPMHtype) value; - if (OAIObj.getError()!=null && OAIObj.getError().size()>0 ) { - List errList = OAIObj.getError(); - String errMessage=""; - for (OAIPMHerrorType error : OAIObj.getError()){ - // NO_SET_HIERARCHY is not an error from the perspective of the DVN, - // it just means that the OAI server doesn't support sets. - if (!error.getCode().equals(OAIPMHerrorcodeType.NO_SET_HIERARCHY)) { - errMessage += error.getCode()+ " " +error.getValue(); - } - } - if (errMessage!="") { - throw new EJBException(errMessage); - } - - } - - ListSetsType listSetsType = OAIObj.getListSets(); - if (listSetsType != null) { - sets = new ArrayList(); - for (Iterator it = listSetsType.getSet().iterator(); it.hasNext();) { - SetType elem = (SetType) it.next(); - SetDetailBean setDetail = new SetDetailBean(); - setDetail.setName(elem.getSetName()); - setDetail.setSpec(elem.getSetSpec()); - sets.add(setDetail); - } - } - } - return sets; - } - */ - - private void logException(Throwable e, Logger logger) { boolean cause = false; @@ -587,28 +435,4 @@ private void logException(Throwable e, Logger logger) { logger.severe(fullMessage); } - /* - Most likely not needed any more: - public List findAllHarvestFormatTypes() { - String queryStr = "SELECT f FROM HarvestFormatType f"; - Query query = em.createQuery(queryStr); - return query.getResultList(); - } - - public HarvestFormatType findHarvestFormatTypeByMetadataPrefix(String metadataPrefix) { - String queryStr = "SELECT f FROM HarvestFormatType f WHERE f.metadataPrefix = '" + metadataPrefix + "'"; - Query query = em.createQuery(queryStr); - List resultList = query.getResultList(); - HarvestFormatType hft = null; - if (resultList.size() > 1) { - throw new EJBException("More than one HarvestFormatType found with metadata Prefix= '" + metadataPrefix + "'"); - } - if (resultList.size() == 1) { - hft = (HarvestFormatType) resultList.get(0); - } - return hft; - } -*/ - - } From edce3df0b296ce4fd2ef3782e195df4d4f954cbb Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 6 May 2016 10:24:19 -0400 Subject: [PATCH 14/37] #3089 Fix links created for notifications --- src/main/java/Bundle.properties | 34 ++++--- src/main/webapp/dataverseuser.xhtml | 152 ++++++++++++++++------------ 2 files changed, 103 insertions(+), 83 deletions(-) diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index 584cc9eb1ae..bbf73b805e1 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -127,25 +127,27 @@ wasPublished=, was published in wasReturnedByReviewer=, was returned by the curator of toReview=Don't forget to publish it or send it back to the contributor! worldMap.added=dataset had a WorldMap layer data added to it. -notification.welcome=Welcome to {0} Dataverse! Get started by adding or finding data. Have questions? Check out the User Guide or contact Dataverse Support for assistance. -notification.requestFileAccess=File access requested for dataset: {1}. -notification.grantFileAccess=Access granted for files in dataset: {1}. -notification.rejectFileAccess=Access rejected for requested files in dataset: {1}. -notification.createDataverse={1} was created in {3}. To learn more about what you can do with your dataverse, check out the User Guide. -notification.createDataset={1} was created in {3}. To learn more about what you can do with a dataset, check out the User Guide. -notification.wasSubmittedForReview={1}, was submitted for review to be published in {3}. Don't forget to publish it or send it back to the contributor\! -notification.wasReturnedByReviewer={1}, was returned by the curator of {3}. -notification.wasPublished={1}, was published in {3}. -notification.worldMap.added={1}, dataset had WorldMap layer data added to it. +notification.welcome=Welcome to {0} Dataverse! Get started by adding or finding data. Have questions? Check out the {1} or contact {2} for assistance. +notification.requestFileAccess=File access requested for dataset: {0}. +notification.grantFileAccess=Access granted for files in dataset: {0}. +notification.rejectFileAccess=Access rejected for requested files in dataset: {0}. +notification.createDataverse={0} was created in {1} . To learn more about what you can do with your dataverse, check out the {2}. +notification.dataverse.management.title=Dataverse Management - Dataverse User Guide +notification.createDataset={0} was created in {1}. To learn more about what you can do with a dataset, check out the {2}. +notification.dataset.management.title=Dataset Management - Dataset User Guide +notification.wasSubmittedForReview={0}, was submitted for review to be published in {1}. Don't forget to publish it or send it back to the contributor\! +notification.wasReturnedByReviewer={0}, was returned by the curator of {1}. +notification.wasPublished={0}, was published in {1}. +notification.worldMap.added={0}, dataset had WorldMap layer data added to it. notification.generic.objectDeleted=The dataverse, dataset, or file for this notification has been deleted. -notification.access.granted.dataverse=You have been granted the {0} role for {2}. -notification.access.granted.dataset=You have been granted the {0} role for {2}. -notification.access.granted.datafile=You have been granted the {0} role for file in {2}. +notification.access.granted.dataverse=You have been granted the {0} role for {1}. +notification.access.granted.dataset=You have been granted the {0} role for {1}. +notification.access.granted.datafile=You have been granted the {0} role for file in {1}. notification.access.granted.fileDownloader.additionalDataverse={0} You now have access to all published restricted and unrestricted files in this dataverse. notification.access.granted.fileDownloader.additionalDataset={0} You now have access to all published restricted and unrestricted files in this dataset. -notification.access.revoked.dataverse=You have been removed from a role in {2}. -notification.access.revoked.dataset=You have been removed from a role in {2}. -notification.access.revoked.datafile=You have been removed from a role in {2}. +notification.access.revoked.dataverse=You have been removed from a role in {0}. +notification.access.revoked.dataset=You have been removed from a role in {0}. +notification.access.revoked.datafile=You have been removed from a role in {0}. removeNotification=Remove Notification groupAndRoles.manageTips=Here is where you can access and manage all the groups you belong to, and the roles you have been assigned. user.signup.tip=Why have a Dataverse account? To create your own dataverse and customize it, add datasets, or request access to restricted files. diff --git a/src/main/webapp/dataverseuser.xhtml b/src/main/webapp/dataverseuser.xhtml index 34fab3097c5..be63a4bbd87 100644 --- a/src/main/webapp/dataverseuser.xhtml +++ b/src/main/webapp/dataverseuser.xhtml @@ -4,7 +4,8 @@ xmlns:f="http://java.sun.com/jsf/core" xmlns:ui="http://java.sun.com/jsf/facelets" xmlns:p="http://primefaces.org/ui" - xmlns:jsf="http://xmlns.jcp.org/jsf"> + xmlns:jsf="http://xmlns.jcp.org/jsf" + xmlns:o="http://omnifaces.org/ui"> @@ -115,103 +116,119 @@ - - - + + + + + #{bundle['header.guides.user']} + + + #{bundle['dataverse']} #{bundle['header.support']} + - - - - - - + + #{item.theObject.getDisplayName()} + + + #{item.theObject.getOwner().getDisplayName()} + + + #{bundle['header.guides.user']} + - - - - - - - + + #{item.theObject.getDataset().getDisplayName()} + + + #{item.theObject.getDataset().getOwner().getDisplayName()} + + + #{bundle['header.guides.user']} + + - - - - + + #{item.theObject.getDataset().getDisplayName()} + + + #{item.theObject.getDataset().getOwner().getDisplayName()} + - - - - - + + #{item.theObject.getDataset().getDisplayName()} + + + #{item.theObject.getDataset().getOwner().getDisplayName()} + + - - - - - + + #{item.theObject.getDataset().getDisplayName()} + + + #{item.theObject.getDataset().getOwner().getDisplayName()} + + - - + + #{item.theObject.displayName} + - - + + #{item.theObject.displayName} + - - + + #{item.theObject.displayName} + - - - - - - - - - + + #{item.theObject.getDataset().getDisplayName()} + - - - - + + + #{item.theObject.getDisplayName()} + + @@ -219,12 +236,12 @@ - - - - + + + #{item.theObject.getDisplayName()} + @@ -237,10 +254,11 @@ - - - - + + + #{item.theObject.getOwner().getDisplayName()} + + @@ -248,27 +266,27 @@ - - - + + #{item.theObject.getDisplayName()} + - - - + + #{item.theObject.getDisplayName()} + - - - + + #{item.theObject.getOwner().getDisplayName()} + From d3e23b2fd25839fc7b63d8f50cc2d2853ef8d6b3 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 6 May 2016 16:06:41 -0400 Subject: [PATCH 15/37] #3089 Saved Search and linked dataverses --- src/main/java/Bundle.properties | 8 ++--- .../harvard/iq/dataverse/DataversePage.java | 36 ++++++++++--------- 2 files changed, 24 insertions(+), 20 deletions(-) diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index bbf73b805e1..8a5a14739a7 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -330,11 +330,11 @@ dataverse.savedsearch.save=Save Linked Search dataverse.savedsearch.dataverse.choose=Choose which of your dataverses you would like to link this search to. dataverse.savedsearch.no.choice=You have one dataverse to which you may add a saved search. # Bundle file editors, please note that "dataverse.savedsearch.save.success" is used in a unit test -dataverse.saved.search.success=The saved search has been successfully linked to {1}. +dataverse.saved.search.success=The saved search has been successfully linked to {0}. dataverse.saved.search.failure=The saved search was not able to be linked. -dataverse.linked.success= {0} has been successfully linked to {3}. -dataverse.linked.success.wait= {0} has been successfully linked to {3}. Please wait for its contents to appear. -dataverse.linked.internalerror={0} has been successfully linked to {3} but contents will not appear until an internal error has been fixed. +dataverse.linked.success= {0} has been successfully linked to {1}. +dataverse.linked.success.wait= {0} has been successfully linked to {1}. Please wait for its contents to appear. +dataverse.linked.internalerror={0} has been successfully linked to {1} but contents will not appear until an internal error has been fixed. dataverse.page.pre=Previous dataverse.page.next=Next dataverse.byCategory=Dataverses by Category diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index 3d5f4139de2..a45b63d5bf3 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -43,6 +43,7 @@ import javax.ejb.EJBException; import javax.faces.event.ValueChangeEvent; import javax.faces.model.SelectItem; +import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; import org.primefaces.event.TransferEvent; @@ -774,19 +775,23 @@ public String saveLinkedDataverse() { savedSearchService.makeLinksForSingleSavedSearch(dataverseRequest, savedSearchOfChildren, debug); //JsfHelper.addSuccessMessage(dataverse.getDisplayName() + " has been successfully linked to " + linkingDataverse.getDisplayName()); List arguments = new ArrayList<>(); - arguments.add(dataverse.getDisplayName()); - arguments.add(systemConfig.getDataverseSiteUrl()); - arguments.add(linkingDataverse.getAlias()); - arguments.add(linkingDataverse.getDisplayName()); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success", arguments)); + arguments.add(StringEscapeUtils.escapeHtml(dataverse.getDisplayName())); + // arguments.add(systemConfig.getDataverseSiteUrl()); + // arguments.add(linkingDataverse.getAlias()); + // arguments.add(linkingDataverse.getDisplayName()); + String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; + arguments.add(linkString); + + System.out.print("Linking Success message " + BundleUtil.getStringFromBundle("dataverse.linked.success", arguments)); // + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success", arguments)); + return "/dataverse.xhtml?alias=" + dataverse.getAlias() + "&faces-redirect=true"; } catch (SearchException | CommandException ex) { // error: solr is down, etc. can't link children right now List arguments = new ArrayList<>(); - arguments.add(dataverse.getDisplayName()); - arguments.add(linkingDataverse.getAlias()); - arguments.add(systemConfig.getDataverseSiteUrl()); - arguments.add(linkingDataverse.getDisplayName()); + arguments.add(StringEscapeUtils.escapeHtml(dataverse.getDisplayName())); + String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; + arguments.add(linkString); JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataverse.linked.internalerror", arguments)); String msg = dataverse.getDisplayName() + " has been successfully linked to " + linkingDataverse.getDisplayName() + " but contents will not appear until an internal error has been fixed."; logger.log(Level.SEVERE, "{0} {1}", new Object[]{msg, ex}); @@ -797,10 +802,9 @@ public String saveLinkedDataverse() { // defer: please wait for the next timer/cron job //JsfHelper.addSuccessMessage(dataverse.getDisplayName() + " has been successfully linked to " + linkingDataverse.getDisplayName() + ". Please wait for its contents to appear."); List arguments = new ArrayList<>(); - arguments.add(dataverse.getDisplayName()); - arguments.add(systemConfig.getDataverseSiteUrl()); - arguments.add(linkingDataverse.getAlias()); - arguments.add(linkingDataverse.getDisplayName()); + arguments.add(StringEscapeUtils.escapeHtml(dataverse.getDisplayName())); + String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; + arguments.add(linkString); JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success.wait", arguments)); return "/dataverse.xhtml?alias=" + dataverse.getAlias() + "&faces-redirect=true"; } @@ -861,9 +865,9 @@ public String saveSavedSearch() { try { commandEngine.submit(cmd); - List arguments = new ArrayList<>(); - arguments.add(linkingDataverse.getAlias()); - arguments.add(linkingDataverse.getDisplayName()); + List arguments = new ArrayList<>(); + String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; + arguments.add(linkString); String successMessageString = BundleUtil.getStringFromBundle("dataverse.saved.search.success", arguments); JsfHelper.addSuccessMessage(successMessageString); return "/dataverse.xhtml?alias=" + dataverse.getAlias() + "&faces-redirect=true"; From d69980c6c36995f154318ceb8c7e16322983c416 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 6 May 2016 16:56:05 -0400 Subject: [PATCH 16/37] Simplify Success Message build --- .../harvard/iq/dataverse/DataversePage.java | 35 +++++++------------ 1 file changed, 12 insertions(+), 23 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java index a45b63d5bf3..1b8b99740fe 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataversePage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataversePage.java @@ -772,27 +772,12 @@ public String saveLinkedDataverse() { // create links (does indexing) right now (might be expensive) boolean debug = false; DataverseRequest dataverseRequest = new DataverseRequest(savedSearchCreator, SavedSearchServiceBean.getHttpServletRequest()); - savedSearchService.makeLinksForSingleSavedSearch(dataverseRequest, savedSearchOfChildren, debug); - //JsfHelper.addSuccessMessage(dataverse.getDisplayName() + " has been successfully linked to " + linkingDataverse.getDisplayName()); - List arguments = new ArrayList<>(); - arguments.add(StringEscapeUtils.escapeHtml(dataverse.getDisplayName())); - // arguments.add(systemConfig.getDataverseSiteUrl()); - // arguments.add(linkingDataverse.getAlias()); - // arguments.add(linkingDataverse.getDisplayName()); - String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; - arguments.add(linkString); - - System.out.print("Linking Success message " + BundleUtil.getStringFromBundle("dataverse.linked.success", arguments)); // - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success", arguments)); - + savedSearchService.makeLinksForSingleSavedSearch(dataverseRequest, savedSearchOfChildren, debug); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success", getSuccessMessageArguments())); return "/dataverse.xhtml?alias=" + dataverse.getAlias() + "&faces-redirect=true"; } catch (SearchException | CommandException ex) { // error: solr is down, etc. can't link children right now - List arguments = new ArrayList<>(); - arguments.add(StringEscapeUtils.escapeHtml(dataverse.getDisplayName())); - String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; - arguments.add(linkString); - JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataverse.linked.internalerror", arguments)); + JsfHelper.addErrorMessage(BundleUtil.getStringFromBundle("dataverse.linked.internalerror", getSuccessMessageArguments())); String msg = dataverse.getDisplayName() + " has been successfully linked to " + linkingDataverse.getDisplayName() + " but contents will not appear until an internal error has been fixed."; logger.log(Level.SEVERE, "{0} {1}", new Object[]{msg, ex}); //JsfHelper.addErrorMessage(msg); @@ -801,14 +786,18 @@ public String saveLinkedDataverse() { } else { // defer: please wait for the next timer/cron job //JsfHelper.addSuccessMessage(dataverse.getDisplayName() + " has been successfully linked to " + linkingDataverse.getDisplayName() + ". Please wait for its contents to appear."); - List arguments = new ArrayList<>(); - arguments.add(StringEscapeUtils.escapeHtml(dataverse.getDisplayName())); - String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; - arguments.add(linkString); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success.wait", arguments)); + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataverse.linked.success.wait", getSuccessMessageArguments())); return "/dataverse.xhtml?alias=" + dataverse.getAlias() + "&faces-redirect=true"; } } + + private List getSuccessMessageArguments() { + List arguments = new ArrayList<>(); + arguments.add(StringEscapeUtils.escapeHtml(dataverse.getDisplayName())); + String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; + arguments.add(linkString); + return arguments; + } @Deprecated private SavedSearch createSavedOfCurrentDataverse(AuthenticatedUser savedSearchCreator) { From 0d5bd945173c5a2252d633b21d295dd1b3ac5b51 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Fri, 6 May 2016 17:06:06 -0400 Subject: [PATCH 17/37] Dataset Linking Success Message --- src/main/java/Bundle.properties | 2 +- .../edu/harvard/iq/dataverse/DatasetPage.java | 26 +++++++++---------- 2 files changed, 13 insertions(+), 15 deletions(-) diff --git a/src/main/java/Bundle.properties b/src/main/java/Bundle.properties index 8a5a14739a7..132fe41c2ba 100755 --- a/src/main/java/Bundle.properties +++ b/src/main/java/Bundle.properties @@ -818,7 +818,7 @@ dataset.message.uploadFiles=Upload Dataset Files - You can drag and drop files f dataset.message.editMetadata=Edit Dataset Metadata - Add more metadata about this dataset to help others easily find it. dataset.message.editTerms=Edit Dataset Terms - Update this dataset's terms of use. dataset.message.createSuccess=This dataset has been created. -dataset.message.linkSuccess= {0} has been successfully linked to {3}. +dataset.message.linkSuccess= {0} has been successfully linked to {1}. dataset.message.metadataSuccess=The metadata for this dataset has been updated. dataset.message.termsSuccess=The terms for this dataset has been updated. dataset.message.filesSuccess=The files for this dataset have been updated. diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index 2c460fcb486..ecc3d549c8c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -73,6 +73,7 @@ import javax.faces.event.AjaxBehaviorEvent; import javax.faces.context.ExternalContext; +import org.apache.commons.lang.StringEscapeUtils; import org.primefaces.component.tabview.TabView; import org.primefaces.event.TabChangeEvent; @@ -2369,7 +2370,16 @@ public void updateFileCounts(){ } } } + + private List getSuccessMessageArguments() { + List arguments = new ArrayList<>(); + arguments.add(StringEscapeUtils.escapeHtml(dataset.getDisplayName())); + String linkString = "" + StringEscapeUtils.escapeHtml(linkingDataverse.getDisplayName()) + ""; + arguments.add(linkString); + return arguments; + } + public String saveLinkedDataset() { if (linkingDataverseId == null) { JsfHelper.addFlashMessage("You must select a linking dataverse."); @@ -2383,15 +2393,7 @@ public String saveLinkedDataset() { LinkDatasetCommand cmd = new LinkDatasetCommand(dvRequestService.getDataverseRequest(), linkingDataverse, dataset); try { commandEngine.submit(cmd); - //JsfHelper.addFlashMessage(JH.localize("dataset.message.linkSuccess") + linkingDataverse.getDisplayName()); - List arguments = new ArrayList(); - arguments.add(dataset.getDisplayName()); - arguments.add(getDataverseSiteUrl()); - arguments.add(linkingDataverse.getAlias()); - arguments.add(linkingDataverse.getDisplayName()); - JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataset.message.linkSuccess", arguments)); - //return ""; - + JsfHelper.addSuccessMessage(BundleUtil.getStringFromBundle("dataset.message.linkSuccess", getSuccessMessageArguments())); } catch (CommandException ex) { String msg = "There was a problem linking this dataset to yours: " + ex; logger.severe(msg); @@ -2400,8 +2402,6 @@ public String saveLinkedDataset() { */ FacesMessage message = new FacesMessage(FacesMessage.SEVERITY_INFO, "DatasetNotLinked", msg); FacesContext.getCurrentInstance().addMessage(null, message); - //return ""; - } return returnToLatestVersion(); } @@ -2411,9 +2411,7 @@ public String saveLinkedDataset() { public boolean isShowAccessPopup() { for (FileMetadata fmd : workingVersion.getFileMetadatas()) { - //System.out.print("restricted :" + fmd.isRestricted()); - //System.out.print("file id :" + fmd.getDataFile().getId()); - + if (fmd.isRestricted()) { if (editMode == EditMode.CREATE) { From d71137409f06da3c13dab7bb730ceb767889dcef Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Mon, 9 May 2016 00:21:56 -0400 Subject: [PATCH 18/37] Further development of the harvesting client; Bug fixes; Completed transition to managing harvesting by configured clients, not by dataverses that have harvested content. --- .../harvard/iq/dataverse/api/Harvesting.java | 21 ++-- .../api/imports/ImportServiceBean.java | 105 ++++++++++++++++++ .../harvest/client/HarvestTimerInfo.java | 18 +-- .../harvest/client/HarvesterServiceBean.java | 75 ++++++------- .../harvest/client/HarvestingClient.java | 6 +- .../client/HarvestingClientServiceBean.java | 4 + .../timer/DataverseTimerServiceBean.java | 62 +++++++---- 7 files changed, 202 insertions(+), 89 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java index 9734cbd51eb..37d505ff2be 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java @@ -9,6 +9,7 @@ import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.GetHarvestingClientCommand; import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; +import edu.harvard.iq.dataverse.harvest.client.ClientHarvestRun; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; import edu.harvard.iq.dataverse.harvest.client.HarvestingClientServiceBean; import edu.harvard.iq.dataverse.util.json.JsonParseException; @@ -224,7 +225,7 @@ public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") S // This POST starts a new harvesting run: @POST @Path("{nickName}/run") - public Response startHarvestingJob(@PathParam("dataverseAlias") String dataverseAlias, @QueryParam("key") String apiKey) throws IOException { + public Response startHarvestingJob(@PathParam("nickName") String clientNickname, @QueryParam("key") String apiKey) throws IOException { try { AuthenticatedUser authenticatedUser = null; @@ -239,22 +240,17 @@ public Response startHarvestingJob(@PathParam("dataverseAlias") String dataverse return errorResponse(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs"); } - Dataverse dataverse = dataverseService.findByAlias(dataverseAlias); + HarvestingClient harvestingClient = harvestingClientService.findByNickname(clientNickname); - if (dataverse == null) { - return errorResponse(Response.Status.NOT_FOUND, "No such dataverse: "+dataverseAlias); + if (harvestingClient == null) { + return errorResponse(Response.Status.NOT_FOUND, "No such dataverse: "+clientNickname); } - if (!dataverse.isHarvested()) { - return errorResponse(Response.Status.BAD_REQUEST, "Not a HARVESTING dataverse: "+dataverseAlias); - } - - //DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser); - - harvesterService.doAsyncHarvest(dataverse); + DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser); + harvesterService.doAsyncHarvest(dataverseRequest, harvestingClient); } catch (Exception e) { - return this.errorResponse(Response.Status.BAD_REQUEST, "Exception thrown when running a Harvest on dataverse \""+dataverseAlias+"\" via REST API; " + e.getMessage()); + return this.errorResponse(Response.Status.BAD_REQUEST, "Exception thrown when running harvesting client\""+clientNickname+"\" via REST API; " + e.getMessage()); } return this.accepted(); } @@ -287,6 +283,7 @@ public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvesti return null; } + return jsonObjectBuilder().add("nickName", harvestingConfig.getName()). add("dataverseAlias", harvestingConfig.getDataverse().getAlias()). add("type", harvestingConfig.getHarvestType()). diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index 629032843eb..bc23b0fa10b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -185,6 +185,111 @@ public JsonObjectBuilder handleFile(DataverseRequest dataverseRequest, Dataverse } } + public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Dataverse owner, String metadataFormat, File metadataFile, PrintWriter cleanupLog) throws ImportException, IOException { + Dataset importedDataset = null; + + DatasetDTO dsDTO = null; + + if ("ddi".equals(metadataFormat)) { + try { + String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath())); + dsDTO = importDDIService.doImport(ImportType.HARVEST, xmlToParse); + } catch (XMLStreamException e) { + throw new ImportException("XMLStreamException" + e); + } + } // TODO: handle all supported formats; via plugins, probably + // (and if the format is already JSON - handle that too! + else { + throw new ImportException("Unsupported import metadata format: " + metadataFormat); + } + + // convert DTO to Json, + Gson gson = new GsonBuilder().setPrettyPrinting().create(); + String json = gson.toJson(dsDTO); + JsonReader jsonReader = Json.createReader(new StringReader(json)); + JsonObject obj = jsonReader.readObject(); + //and call parse Json to read it into a dataset + try { + JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService); + parser.setLenient(true); + Dataset ds = parser.parseDataset(obj); + + // For ImportType.NEW, if the metadata contains a global identifier, and it's not a protocol + // we support, it should be rejected. + // (TODO: ! - add some way of keeping track of supported protocols!) + //if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) { + // throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported"); + //} + ds.setOwner(owner); + ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields()); + + // Check data against required contraints + List violations = ds.getVersions().get(0).validateRequired(); + if (!violations.isEmpty()) { + // For migration and harvest, add NA for missing required values + for (ConstraintViolation v : violations) { + DatasetField f = ((DatasetField) v.getRootBean()); + f.setSingleValue(DatasetField.NA_VALUE); + } + } + + // Check data against validation constraints + // If we are migrating and "scrub migration data" is true we attempt to fix invalid data + // if the fix fails stop processing of this file by throwing exception + Set invalidViolations = ds.getVersions().get(0).validate(); + ValidatorFactory factory = Validation.buildDefaultValidatorFactory(); + Validator validator = factory.getValidator(); + if (!invalidViolations.isEmpty()) { + for (ConstraintViolation v : invalidViolations) { + DatasetFieldValue f = ((DatasetFieldValue) v.getRootBean()); + boolean fixed = false; + boolean converted = false; + // TODO: Is this scrubbing something we want to continue doing? + if (settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) { + fixed = processMigrationValidationError(f, cleanupLog, metadataFile.getName()); + converted = true; + if (fixed) { + Set> scrubbedViolations = validator.validate(f); + if (!scrubbedViolations.isEmpty()) { + fixed = false; + } + } + } + if (!fixed) { + String msg = "Data modified - File: " + metadataFile.getName() + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; " + + "Invalid value: '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'"; + cleanupLog.println(msg); + f.setValue(DatasetField.NA_VALUE); + + } + } + } + + Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId()); + + if (existingDs != null) { + // For harvested datasets, there should always only be one version. + // We will replace the current version with the imported version. + if (existingDs.getVersions().size() != 1) { + throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions"); + } + engineSvc.submit(new DestroyDatasetCommand(existingDs, dataverseRequest)); + importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST)); + + } else { + importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST)); + } + + } catch (JsonParseException ex) { + logger.log(Level.INFO, "Error parsing datasetVersion: {0}", ex.getMessage()); + throw new ImportException("Error parsing datasetVersion: " + ex.getMessage(), ex); + } catch (CommandException ex) { + logger.log(Level.INFO, "Error excuting Create dataset command: {0}", ex.getMessage()); + throw new ImportException("Error excuting dataverse command: " + ex.getMessage(), ex); + } + return importedDataset; + } + public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse owner, String xmlToParse, String fileName, ImportType importType, PrintWriter cleanupLog) throws ImportException, IOException { String status = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java index 96599352cf2..b9db5f22d02 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java @@ -23,8 +23,8 @@ /** * This class is used when creating an EJB Timer for scheduling Harvesting. - * We use this class rather than the HarvestingDataverse entity because - * the class must be Serializable, and there is too much info associated with the HarvestingDataverse + * We use this class rather than the HarvestingClient entity because + * the class must be Serializable, and there is too much info associated with the HarvestingClient * in order to realistically serialize it. (We can't make related mapped entities transient.) * * Based on the DVN 3 implementation, @@ -34,7 +34,7 @@ * @author Leonid Andreev */ public class HarvestTimerInfo implements Serializable { - private Long harvestingDataverseId; + private Long harvestingClientId; private String name; private String schedulePeriod; private Integer scheduleHourOfDay; @@ -44,8 +44,8 @@ public HarvestTimerInfo() { } - public HarvestTimerInfo(Long harvestingDataverseId, String name, String schedulePeriod, Integer scheduleHourOfDay, Integer scheduleDayOfWeek) { - this.harvestingDataverseId=harvestingDataverseId; + public HarvestTimerInfo(Long harvestingClientId, String name, String schedulePeriod, Integer scheduleHourOfDay, Integer scheduleDayOfWeek) { + this.harvestingClientId=harvestingClientId; this.name=name; this.schedulePeriod=schedulePeriod; this.scheduleDayOfWeek=scheduleDayOfWeek; @@ -53,12 +53,12 @@ public HarvestTimerInfo(Long harvestingDataverseId, String name, String schedule } - public Long getHarvestingDataverseId() { - return harvestingDataverseId; + public Long getHarvestingClientId() { + return harvestingClientId; } - public void setHarvestingDataverseId(Long harvestingDataverseId) { - this.harvestingDataverseId = harvestingDataverseId; + public void setHarvestingClientId(Long harvestingClientId) { + this.harvestingClientId = harvestingClientId; } public String getName() { return name; diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java index 63cd2e380ec..939a9e1e582 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java @@ -46,6 +46,8 @@ import com.lyncode.xoai.serviceprovider.client.HttpOAIClient; import com.lyncode.xoai.serviceprovider.exceptions.BadArgumentException; import com.lyncode.xoai.serviceprovider.parameters.ListIdentifiersParameters; +import edu.harvard.iq.dataverse.api.imports.ImportServiceBean; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; /** * @@ -65,6 +67,8 @@ public class HarvesterServiceBean { DataverseTimerServiceBean dataverseTimerService; @EJB HarvestingClientServiceBean harvestingClientService; + @EJB + ImportServiceBean importService; private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean"); private static final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); @@ -72,6 +76,7 @@ public class HarvesterServiceBean { public static final String HARVEST_RESULT_SUCCESS="success"; public static final String HARVEST_RESULT_FAILED="failed"; + private static final Long INDEXING_CONTENT_BATCH_SIZE = 10000000L; public HarvesterServiceBean() { @@ -82,12 +87,12 @@ public HarvesterServiceBean() { * Called to run an "On Demand" harvest. */ @Asynchronous - public void doAsyncHarvest(Dataverse harvestingDataverse) { + public void doAsyncHarvest(DataverseRequest dataverseRequest, HarvestingClient harvestingClient) { try { - doHarvest(harvestingDataverse.getId()); + doHarvest(dataverseRequest, harvestingClient.getId()); } catch (Exception e) { - logger.info("Caught exception running an asynchronous harvest (dataverse \""+harvestingDataverse.getAlias()+"\")"); + logger.info("Caught exception running an asynchronous harvest (dataverse \""+harvestingClient.getName()+"\")"); } } @@ -95,30 +100,18 @@ public void createScheduledHarvestTimers() { logger.log(Level.INFO, "HarvesterService: going to (re)create Scheduled harvest timers."); dataverseTimerService.removeHarvestTimers(); - List dataverses = dataverseService.getAllHarvestedDataverses(); - for (Iterator it = dataverses.iterator(); it.hasNext();) { - Dataverse dataverse = (Dataverse) it.next(); - HarvestingClient harvestingConfig = dataverse.getHarvestingClientConfig(); - if (harvestingConfig == null) { - logger.warning("ERROR: no harvesting config found for dataverse id="+dataverse.getId()); - } else if (harvestingConfig.isScheduled()) { - createHarvestTimer(dataverse); + List configuredClients = harvestingClientService.getAllHarvestingClients(); + for (Iterator it = configuredClients.iterator(); it.hasNext();) { + HarvestingClient harvestingConfig = (HarvestingClient) it.next(); + if (harvestingConfig.isScheduled()) { + dataverseTimerService.createHarvestTimer(harvestingConfig); } } } - - public void removeHarvestTimer(Dataverse dataverse) { - dataverseTimerService.removeHarvestTimer(dataverse); - } - - public void updateHarvestTimer(Dataverse harvestedDataverse) { - removeHarvestTimer(harvestedDataverse); - createHarvestTimer(harvestedDataverse); - } - + public List getHarvestTimers() { - ArrayList timers = new ArrayList(); - // Clear dataverse timer, if one exists + ArrayList timers = new ArrayList<>(); + for (Iterator it = timerService.getTimers().iterator(); it.hasNext();) { Timer timer = (Timer) it.next(); if (timer.getInfo() instanceof HarvestTimerInfo) { @@ -129,6 +122,10 @@ public List getHarvestTimers() { return timers; } + /* + This method is implemented in the DataverseTimerServiceBean; + TODO: make sure that implementation does everything we need. + -- L.A. 4.4, May 08 2016. private void createHarvestTimer(Dataverse harvestingDataverse) { HarvestingClient harvestingDataverseConfig = harvestingDataverse.getHarvestingClientConfig(); @@ -164,28 +161,25 @@ private void createHarvestTimer(Dataverse harvestingDataverse) { dataverseTimerService.createTimer(initExpirationDate, intervalDuration, new HarvestTimerInfo(harvestingDataverse.getId(), harvestingDataverse.getName(), harvestingDataverseConfig.getSchedulePeriod(), harvestingDataverseConfig.getScheduleHourOfDay(), harvestingDataverseConfig.getScheduleDayOfWeek())); } } + */ /** * Run a harvest for an individual harvesting Dataverse * @param dataverseId */ - public void doHarvest(Long dataverseId) throws IOException { - Dataverse harvestingDataverse = dataverseService.find(dataverseId); - - if (harvestingDataverse == null) { - throw new IOException("No such Dataverse: id="+dataverseId); - } - - HarvestingClient harvestingClientConfig = harvestingDataverse.getHarvestingClientConfig(); + public void doHarvest(DataverseRequest dataverseRequest, Long harvestingClientId) throws IOException { + HarvestingClient harvestingClientConfig = harvestingClientService.find(harvestingClientId); if (harvestingClientConfig == null) { - throw new IOException("Could not find Harvesting Config for Dataverse id="+dataverseId); + throw new IOException("No such harvesting client: id="+harvestingClientId); } + Dataverse harvestingDataverse = harvestingClientConfig.getDataverse(); + MutableBoolean harvestErrorOccurred = new MutableBoolean(false); String logTimestamp = logFormatter.format(new Date()); Logger hdLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean." + harvestingDataverse.getAlias() + logTimestamp); - String logFileName = /* TODO: !!!! FileUtil.getImportFileDir() +*/ File.separator + "harvest_" + harvestingDataverse.getAlias() + logTimestamp + ".log"; + String logFileName = "../logs" + File.separator + "harvest_" + harvestingClientConfig.getName() + logTimestamp + ".log"; FileHandler fileHandler = new FileHandler(logFileName); hdLogger.addHandler(fileHandler); List harvestedDatasetIds = null; @@ -208,7 +202,7 @@ public void doHarvest(Long dataverseId) throws IOException { if (harvestingClientConfig.isOai()) { - harvestedDatasetIds = harvestOAI(harvestingClientConfig, hdLogger, harvestErrorOccurred, failedIdentifiers, harvestedDatasetIdsThisBatch); + harvestedDatasetIds = harvestOAI(dataverseRequest, harvestingClientConfig, hdLogger, harvestErrorOccurred, failedIdentifiers, harvestedDatasetIdsThisBatch); } else { throw new IOException("Unsupported harvest type"); @@ -260,7 +254,7 @@ public void doHarvest(Long dataverseId) throws IOException { * @param harvestErrorOccurred have we encountered any errors during harvest? * @param failedIdentifiers Study Identifiers for failed "GetRecord" requests */ - private List harvestOAI(HarvestingClient harvestingClient, Logger hdLogger, MutableBoolean harvestErrorOccurred, List failedIdentifiers, List harvestedDatasetIdsThisBatch) + private List harvestOAI(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, Logger hdLogger, MutableBoolean harvestErrorOccurred, List failedIdentifiers, List harvestedDatasetIdsThisBatch) throws IOException, ParserConfigurationException, SAXException, TransformerException { List harvestedDatasetIds = new ArrayList(); @@ -287,7 +281,7 @@ private List harvestOAI(HarvestingClient harvestingClient, Logger hdLogger // Retrieve and process this record with a separate GetRecord call: MutableBoolean getRecordErrorOccurred = new MutableBoolean(false); - Long datasetId = getRecord(hdLogger, harvestingClient, identifier, metadataPrefix, getRecordErrorOccurred, processedSizeThisBatch); + Long datasetId = getRecord(dataverseRequest, hdLogger, harvestingClient, identifier, metadataPrefix, getRecordErrorOccurred, processedSizeThisBatch); if (datasetId != null) { harvestedDatasetIds.add(datasetId); } @@ -302,7 +296,7 @@ private List harvestOAI(HarvestingClient harvestingClient, Logger hdLogger // reindexing in batches? - this is from DVN 3; // we may not need it anymore. - if ( processedSizeThisBatch > 10000000 ) { + if ( processedSizeThisBatch > INDEXING_CONTENT_BATCH_SIZE ) { hdLogger.log(Level.INFO, "REACHED CONTENT BATCH SIZE LIMIT; calling index ("+ harvestedDatasetIdsThisBatch.size()+" datasets in the batch)."); //indexService.updateIndexList(this.harvestedDatasetIdsThisBatch); @@ -359,7 +353,7 @@ private ListIdentifiersParameters buildParams(String metadataPrefix, String set, @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) - public Long getRecord(Logger hdLogger, HarvestingClient harvestingClient, String identifier, String metadataPrefix, MutableBoolean recordErrorOccurred, Long processedSizeThisBatch) { + public Long getRecord(DataverseRequest dataverseRequest, Logger hdLogger, HarvestingClient harvestingClient, String identifier, String metadataPrefix, MutableBoolean recordErrorOccurred, Long processedSizeThisBatch) { String errMessage = null; Dataset harvestedDataset = null; String oaiUrl = harvestingClient.getHarvestingUrl(); @@ -384,10 +378,9 @@ public Long getRecord(Logger hdLogger, HarvestingClient harvestingClient, String } } else { - hdLogger.log(Level.INFO, "Successfully retreived GetRecord response."); - + hdLogger.log(Level.INFO, "Successfully retrieved GetRecord response."); - harvestedDataset = null; // TODO: !!! import + harvestedDataset = importService.doImportHarvestedDataset(dataverseRequest, parentDataverse, metadataPrefix, record.getMetadataFile(), null); hdLogger.log(Level.INFO, "Harvest Successful for identifier " + identifier); diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java index a5923ff9d3d..2bf32098dbc 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java @@ -222,10 +222,9 @@ public ClientHarvestRun getLastSuccessfulRun() { return null; } - ClientHarvestRun harvestRun = null; int i = harvestHistory.size() - 1; - while (i > 0) { + while (i > -1) { if (harvestHistory.get(i).isSuccess()) { return harvestHistory.get(i); } @@ -240,10 +239,9 @@ ClientHarvestRun getLastNonEmptyRun() { return null; } - ClientHarvestRun harvestRun = null; int i = harvestHistory.size() - 1; - while (i > 0) { + while (i > -1) { if (harvestHistory.get(i).isSuccess()) { if (harvestHistory.get(i).getHarvestedDatasetCount().longValue() > 0 || harvestHistory.get(i).getDeletedDatasetCount().longValue() > 0) { diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java index 853c511cd72..74634399f5b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java @@ -35,6 +35,10 @@ public class HarvestingClientServiceBean { private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvestingClinetServiceBean"); + public HarvestingClient find(Object pk) { + return (HarvestingClient) em.find(HarvestingClient.class, pk); + } + public HarvestingClient findByNickname(String nickName) { try { return em.createNamedQuery("HarvestingClient.findByNickname", HarvestingClient.class) diff --git a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java index b24be18264a..49bac288008 100644 --- a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java @@ -7,6 +7,11 @@ import edu.harvard.iq.dataverse.Dataverse; import edu.harvard.iq.dataverse.DataverseServiceBean; +import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; +import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUser; +import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; +import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.harvest.client.HarvestTimerInfo; import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; @@ -29,6 +34,7 @@ import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; + /** * * @author roberttreacy @@ -46,6 +52,8 @@ public class DataverseTimerServiceBean implements Serializable { DataverseServiceBean dataverseService; @EJB HarvestingClientServiceBean harvestingClientService; + @EJB + AuthenticationServiceBean authSvc; /*@EJB StudyServiceLocal studyService;*/ @@ -83,8 +91,22 @@ public void handleTimeout(javax.ejb.Timer timer) { HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); try { - logger.log(Level.INFO, "running a harvester client configured for dataverse " + info.getHarvestingDataverseId()); - harvesterService.doHarvest(info.getHarvestingDataverseId()); + logger.log(Level.INFO, "running a harvesting client: id=" + info.getHarvestingClientId()); + // Timer batch jobs are run by the main Admin user. + // TODO: revisit how we retrieve the superuser here. + // (looking it up by the identifier "admin" is not necessarily the + // cleanest way). Should it be configurable somewhere, which superuser + // runs these jobs? Should there be a central mechanism for obtaining + // the "major", builtin superuser for this Dataverse instance? + // -- L.A. 4.4, May 8 2016 + DataverseRequest dataverseRequest = null; + AuthenticatedUser adminUser = authSvc.getAuthenticatedUser("admin"); + if (adminUser != null) { + dataverseRequest = new DataverseRequest(adminUser, null); + } + // TODO: create a real DataverseRequest here, associated with the main admin user (?) + // -- L.A. 4.4, May 8 2016 + harvesterService.doHarvest(dataverseRequest, info.getHarvestingClientId()); } catch (Throwable e) { // Harvester Service should be handling any error notifications, @@ -138,30 +160,24 @@ public void removeHarvestTimers() { } } - private void createHarvestTimer(Dataverse harvestedDataverse) { - HarvestingClient harvestedDataverseConfig = harvestedDataverse.getHarvestingClientConfig(); - - if (harvestedDataverseConfig == null) { - logger.info("ERROR: No Harvesting Configuration found for dataverse id="+harvestedDataverse.getId()); - return; - } + public void createHarvestTimer(HarvestingClient harvestingClient) { - if (harvestedDataverseConfig.isScheduled()) { + if (harvestingClient.isScheduled()) { long intervalDuration = 0; Calendar initExpiration = Calendar.getInstance(); initExpiration.set(Calendar.MINUTE, 0); initExpiration.set(Calendar.SECOND, 0); - if (harvestedDataverseConfig.getSchedulePeriod().equals(HarvestingClient.SCHEDULE_PERIOD_DAILY)) { + if (harvestingClient.getSchedulePeriod().equals(HarvestingClient.SCHEDULE_PERIOD_DAILY)) { intervalDuration = 1000 * 60 * 60 * 24; - initExpiration.set(Calendar.HOUR_OF_DAY, harvestedDataverseConfig.getScheduleHourOfDay()); + initExpiration.set(Calendar.HOUR_OF_DAY, harvestingClient.getScheduleHourOfDay()); - } else if (harvestedDataverseConfig.getSchedulePeriod().equals(harvestedDataverseConfig.SCHEDULE_PERIOD_WEEKLY)) { + } else if (harvestingClient.getSchedulePeriod().equals(harvestingClient.SCHEDULE_PERIOD_WEEKLY)) { intervalDuration = 1000 * 60 * 60 * 24 * 7; - initExpiration.set(Calendar.HOUR_OF_DAY, harvestedDataverseConfig.getScheduleHourOfDay()); - initExpiration.set(Calendar.DAY_OF_WEEK, harvestedDataverseConfig.getScheduleDayOfWeek()); + initExpiration.set(Calendar.HOUR_OF_DAY, harvestingClient.getScheduleHourOfDay()); + initExpiration.set(Calendar.DAY_OF_WEEK, harvestingClient.getScheduleDayOfWeek()); } else { - logger.log(Level.WARNING, "Could not set timer for harvestedDataverse id, " + harvestedDataverse.getId() + ", unknown schedule period: " + harvestedDataverseConfig.getSchedulePeriod()); + logger.log(Level.WARNING, "Could not set timer for harvesting client id=" + harvestingClient.getId() + ", unknown schedule period: " + harvestingClient.getSchedulePeriod()); return; } Date initExpirationDate = initExpiration.getTime(); @@ -169,18 +185,18 @@ private void createHarvestTimer(Dataverse harvestedDataverse) { if (initExpirationDate.before(currTime)) { initExpirationDate.setTime(initExpiration.getTimeInMillis() + intervalDuration); } - logger.log(Level.INFO, "Setting timer for dataverse " + harvestedDataverse.getName() + ", initial expiration: " + initExpirationDate); - createTimer(initExpirationDate, intervalDuration, new HarvestTimerInfo(harvestedDataverse.getId(), harvestedDataverse.getName(), harvestedDataverseConfig.getSchedulePeriod(), harvestedDataverseConfig.getScheduleHourOfDay(), harvestedDataverseConfig.getScheduleDayOfWeek())); + logger.log(Level.INFO, "Setting timer for harvesting client " + harvestingClient.getName() + ", initial expiration: " + initExpirationDate); + createTimer(initExpirationDate, intervalDuration, new HarvestTimerInfo(harvestingClient.getId(), harvestingClient.getName(), harvestingClient.getSchedulePeriod(), harvestingClient.getScheduleHourOfDay(), harvestingClient.getScheduleDayOfWeek())); } } - public void updateHarvestTimer(Dataverse harvestedDataverse) { - removeHarvestTimer(harvestedDataverse); - createHarvestTimer(harvestedDataverse); + public void updateHarvestTimer(HarvestingClient harvestingClient) { + removeHarvestTimer(harvestingClient); + createHarvestTimer(harvestingClient); } - public void removeHarvestTimer(Dataverse harvestedDataverse) { + public void removeHarvestTimer(HarvestingClient harvestingClient) { // Clear dataverse timer, if one exists try { logger.log(Level.INFO,"Removing harvest timer on " + InetAddress.getLocalHost().getCanonicalHostName()); @@ -191,7 +207,7 @@ public void removeHarvestTimer(Dataverse harvestedDataverse) { Timer timer = (Timer) it.next(); if (timer.getInfo() instanceof HarvestTimerInfo) { HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); - if (info.getHarvestingDataverseId().equals(harvestedDataverse.getId())) { + if (info.getHarvestingClientId().equals(harvestingClient.getId())) { timer.cancel(); } } From aef0d7d1378406072c5378359c740d47c4d79bca Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 9 May 2016 14:23:06 -0400 Subject: [PATCH 19/37] Remove Root DV Name from Support Link Title --- src/main/webapp/resources/iqbs/messages.xhtml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/main/webapp/resources/iqbs/messages.xhtml b/src/main/webapp/resources/iqbs/messages.xhtml index 6dfafef95e8..f4fd0f53e94 100644 --- a/src/main/webapp/resources/iqbs/messages.xhtml +++ b/src/main/webapp/resources/iqbs/messages.xhtml @@ -21,7 +21,7 @@
 Error - Please contact Dataverse Support for assistance. + Please contact Dataverse Support for assistance.
@@ -48,7 +48,7 @@  #{msg.summary}  - Please contact Dataverse Support for assistance. + Please contact Dataverse Support for assistance. Date/Time: From 09454b2816674ec1b223a4db77d09457f170f412 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Mon, 9 May 2016 15:19:31 -0400 Subject: [PATCH 20/37] Fix BundleUtilTest --- src/test/java/edu/harvard/iq/dataverse/util/BundleUtilTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/test/java/edu/harvard/iq/dataverse/util/BundleUtilTest.java b/src/test/java/edu/harvard/iq/dataverse/util/BundleUtilTest.java index 22938e7da78..8646fad3b8a 100644 --- a/src/test/java/edu/harvard/iq/dataverse/util/BundleUtilTest.java +++ b/src/test/java/edu/harvard/iq/dataverse/util/BundleUtilTest.java @@ -52,7 +52,7 @@ public void testGetStringFromBundleWithArguments() { assertEquals("The saved search has been successfully linked to " + "DV Name.", BundleUtil.getStringFromBundle("dataverse.saved.search.success", - Arrays.asList("dvAlias", "DV Name"))); + Arrays.asList("DV Name"))); } @Test From edec6828a06781c25c00eab56e52d2a072b7588c Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Mon, 9 May 2016 17:35:45 -0400 Subject: [PATCH 21/37] fix for escaping linked titles on My Data page --- src/main/webapp/resources/js/mydata.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/resources/js/mydata.js b/src/main/webapp/resources/js/mydata.js index 436524b88ea..5a1b83212e1 100644 --- a/src/main/webapp/resources/js/mydata.js +++ b/src/main/webapp/resources/js/mydata.js @@ -59,7 +59,7 @@ function init_mydata_page(){ //console.log('init_mydata_page'); $('#div-more-cards-link').hide(); // var env = new nunjucks.Environment(new nunjucks.WebLoader('/mydata_templates'), true); - //nunjucks.configure({ autoescape: true }); + nunjucks.configure({ autoescape: true }); // Capture checkbox clicks // From f4cc04fa398310782a9674308ffd1271b2ceaee2 Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Mon, 9 May 2016 17:42:50 -0400 Subject: [PATCH 22/37] fix for escaping linked titles in Citation (added new DataCitation class) --- .../harvard/iq/dataverse/DataCitation.java | 228 ++++++++++++++++++ .../harvard/iq/dataverse/DatasetVersion.java | 141 +---------- 2 files changed, 232 insertions(+), 137 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/DataCitation.java diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java new file mode 100644 index 00000000000..8f68ffbf0cb --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -0,0 +1,228 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse; + +import java.net.URL; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import org.apache.commons.lang.StringEscapeUtils; +import org.apache.commons.lang.StringUtils; + +/** + * + * @author gdurand + */ +public class DataCitation { + + private String authors; + private String title; + private Date citationDate; + private GlobalId persistentId; + private String version; + private String UNF; + private String distributors; + + public DataCitation(String authors, String title, Date citationDate, GlobalId persistentId, String version, String UNF, String distributors) { + this.authors = authors; + this.title = title; + this.citationDate = citationDate; + this.persistentId = persistentId; + this.version = version; + this.UNF = UNF; + this.distributors = distributors; + } + + public DataCitation(DatasetVersion dsv) { + // authors (or producer) + authors = dsv.getAuthorsStr(false); + if (StringUtils.isEmpty(authors)) { + authors = dsv.getDatasetProducersString(); + } + + // citation date + if (!dsv.getDataset().isHarvested()) { + citationDate = dsv.getCitationDate(); + if (citationDate == null) { + if (dsv.getDataset().getPublicationDate() != null) { + citationDate = dsv.getDataset().getPublicationDate(); + } else { // for drafts + citationDate = new Date(); + } + } + } else { + try { + citationDate = new SimpleDateFormat("yyyy").parse(dsv.getDistributionDate()); + } catch (ParseException ex) { + // ignore + } + } + + // title + title = dsv.getTitle(); + + // The Global Identifier: + // It is always part of the citation for the local datasets; + // And for *some* harvested datasets. + if (!dsv.getDataset().isHarvested() + || HarvestingDataverseConfig.HARVEST_STYLE_VDC.equals(dsv.getDataset().getOwner().getHarvestingDataverseConfig().getHarvestStyle()) + || HarvestingDataverseConfig.HARVEST_STYLE_ICPSR.equals(dsv.getDataset().getOwner().getHarvestingDataverseConfig().getHarvestStyle()) + || HarvestingDataverseConfig.HARVEST_STYLE_DATAVERSE.equals(dsv.getDataset().getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { + if (!StringUtils.isEmpty(dsv.getDataset().getIdentifier())) { + persistentId = new GlobalId(dsv.getDataset().getGlobalId()); + } + } + + // distributors + if (!dsv.getDataset().isHarvested()) { + distributors = dsv.getRootDataverseNameforCitation(); + } else { + distributors = dsv.getDistributorName(); + if (!StringUtils.isEmpty(distributors)) { + distributors += " [distributor]"; + } + } + + // version + if (!dsv.getDataset().isHarvested()) { + if (dsv.isDraft()) { + version = "DRAFT VERSION"; + } else if (dsv.getVersionNumber() != null) { + version = "V" + dsv.getVersionNumber(); + if (dsv.isDeaccessioned()) { + version += ", DEACCESSIONED VERSION"; + } + } + } + + // UNF + UNF = dsv.getUNF(); + + } + + public String getAuthors() { + return authors; + } + + public void setAuthors(String authors) { + this.authors = authors; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public Date getCitationDate() { + return citationDate; + } + + public void setCitationDate(Date citationDate) { + this.citationDate = citationDate; + } + + public GlobalId getPersistentId() { + return persistentId; + } + + public void setPersistentId(GlobalId persistentId) { + this.persistentId = persistentId; + } + + public String getVersion() { + return version; + } + + public void setVersion(String version) { + this.version = version; + } + + public String getUNF() { + return UNF; + } + + public void setUNF(String UNF) { + this.UNF = UNF; + } + + public String getDistributors() { + return distributors; + } + + public void setDistributors(String distributors) { + this.distributors = distributors; + } + + @Override + public String toString() { + return toString(false); + } + + public String toString(boolean html) { + + List citationList = new ArrayList<>(); + + // first add comma separated parts + addNonEmptyStringToList(citationList, formatString(authors, html)); + addNonEmptyStringToList(citationList, formatNonEmptyDate(citationDate,"yyyy")); + addNonEmptyStringToList(citationList, formatString(title, html, "\"")); + addNonEmptyStringToList(citationList, formatURL(persistentId.toURL(), html)); + addNonEmptyStringToList(citationList, formatString(distributors, html)); + addNonEmptyStringToList(citationList, version); + StringBuilder citation = new StringBuilder(StringUtils.join(citationList, ", ")); + + // append UNF + if (!StringUtils.isEmpty(UNF)) { + citation.append( " [").append(UNF).append("]"); + } + + return citation.toString(); + } + + + // helper methods + private void addNonEmptyStringToList(List list, String value) { + if (!StringUtils.isEmpty(value)) { + list.add(value); + } + } + + private String formatString(String value, boolean escapeHtml) { + return formatString(value, escapeHtml, ""); + } + + + private String formatString(String value, boolean escapeHtml, String wrapper) { + if (!StringUtils.isEmpty(value)) { + return new StringBuilder(wrapper) + .append(escapeHtml ? StringEscapeUtils.escapeHtml(value) : value) + .append(wrapper).toString(); + } + return null; + } + + private String formatNonEmptyDate(Date date, String format) { + return date == null ? null : new SimpleDateFormat(format).format(date); + } + + private String formatURL(URL value, boolean html) { + if (value ==null) { + return null; + } + + if (html) { + return "" + value.toString() + ""; + } else { + return value.toString(); + } + + } +} diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index 311851eda6b..469375828c9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -41,6 +41,7 @@ import javax.validation.Validation; import javax.validation.Validator; import javax.validation.ValidatorFactory; +import org.apache.commons.lang.StringEscapeUtils; /** * @@ -742,145 +743,11 @@ public String getCitation() { return getCitation(false); } - public String getCitation(boolean isOnlineVersion) { - - String str = ""; - - boolean includeAffiliation = false; - String authors = this.getAuthorsStr(includeAffiliation); - if (!StringUtil.isEmpty(authors)) { - str += authors; - } else { - str += getDatasetProducersString(); - } - - Date citationDate = getCitationDate(); - if (citationDate != null) { - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - str += new SimpleDateFormat("yyyy").format(citationDate); - - } else { - if (this.getDataset().getPublicationDate() == null || StringUtil.isEmpty(this.getDataset().getPublicationDate().toString())) { - - if (!this.getDataset().isHarvested()) { - //if not released use current year - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - str += new SimpleDateFormat("yyyy").format(new Timestamp(new Date().getTime())); - } else { - String distDate = getDistributionDate(); - if (distDate != null) { - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - str += distDate; - } - } - } else { - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - str += new SimpleDateFormat("yyyy").format(new Timestamp(this.getDataset().getPublicationDate().getTime())); - } - } - - if (this.getTitle() != null) { - if (!StringUtil.isEmpty(this.getTitle())) { - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - str += "\"" + this.getTitle() + "\""; - } - } - - if (this.getDataset().isHarvested()) { - String distributorName = getDistributorName(); - if (distributorName != null && distributorName.trim().length() > 0) { - if (!StringUtil.isEmpty(str)) { - str += ". "; - } - str += " " + distributorName; - str += " [distributor]"; - } - } - - // The Global Identifier: - // It is always part of the citation for the local datasets; - // And for *some* harvested datasets. - if (!this.getDataset().isHarvested() - || HarvestingDataverseConfig.HARVEST_STYLE_VDC.equals(this.getDataset().getOwner().getHarvestingDataverseConfig().getHarvestStyle()) - || HarvestingDataverseConfig.HARVEST_STYLE_ICPSR.equals(this.getDataset().getOwner().getHarvestingDataverseConfig().getHarvestStyle()) - || HarvestingDataverseConfig.HARVEST_STYLE_DATAVERSE.equals(this.getDataset().getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { - if (!StringUtil.isEmpty(this.getDataset().getIdentifier())) { - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - if (isOnlineVersion) { - str += "" + this.getDataset().getPersistentURL() + ""; - } else { - str += this.getDataset().getPersistentURL(); - } - } - } - - // Get root dataverse name for Citation - // (only for non-harvested datasets): - if (!this.getDataset().isHarvested()) { - String dataverseName = getRootDataverseNameforCitation(); - if (!StringUtil.isEmpty(dataverseName)) { - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - str += " " + dataverseName; - } - } - - // Version status: - // Again, this is needed for non-harvested stuff only: - // (the check may be redundant - we may already be dropping version - // numbers when harvesting -- L.A. 4.0 beta15) - if (!this.getDataset().isHarvested()) { - if (this.isDraft()) { - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - str += " DRAFT VERSION "; - - } else if (this.getVersionNumber() != null) { - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - str += " V" + this.getVersionNumber(); - - } - if (this.isDeaccessioned()) { - if (!StringUtil.isEmpty(str)) { - str += ", "; - } - str += " DEACCESSIONED VERSION "; - - } - } - - if (!StringUtil.isEmpty(getUNF())) { - if (!StringUtil.isEmpty(str)) { - str += " "; - } - str += "[" + getUNF() + "]"; - } - /* - String distributorNames = getDistributorNames(); - if (distributorNames.trim().length() > 0) { - str += " " + distributorNames; - str += " [Distributor]"; - }*/ - return str; + public String getCitation(boolean html) { + return new DataCitation(this).toString(html); } - private Date getCitationDate() { + public Date getCitationDate() { DatasetField citationDate = getDatasetField(this.getDataset().getCitationDateDatasetFieldType()); if (citationDate != null && citationDate.getDatasetFieldType().getFieldType().equals(FieldType.DATE)){ try { From da23b730e7a21cec280571d90633ab5d848f468e Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Mon, 9 May 2016 18:08:29 -0400 Subject: [PATCH 23/37] changed a call for citation to get html version --- src/main/java/edu/harvard/iq/dataverse/DatasetPage.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java index ecc3d549c8c..c2e18cebef5 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetPage.java @@ -2141,7 +2141,7 @@ public void refresh() { fileMetadatasSearch = workingVersion.getFileMetadatasSorted(); } - displayCitation = dataset.getCitation(false, workingVersion); + displayCitation = dataset.getCitation(true, workingVersion); stateChanged = false; } From 5d0d634622a26816563b71ad3580e90473820027 Mon Sep 17 00:00:00 2001 From: Philip Durbin Date: Tue, 10 May 2016 10:14:09 -0400 Subject: [PATCH 24/37] Indexing: make "citation" plaintext, add "citationHtml" #3089 - Updated Search API Guide to better reflect reality. - Added note about "citationHtml" for back compat - `file_content_type` and `score` added a while ago - `publicationDate` not `publication_date_s` - different facets showing --- conf/solr/4.6.0/schema.xml | 1 + doc/sphinx-guides/source/api/search.rst | 52 ++++++++++-------- .../iq/dataverse/search/IndexServiceBean.java | 3 +- .../iq/dataverse/search/SearchFields.java | 13 ++++- .../dataverse/search/SearchServiceBean.java | 2 + .../iq/dataverse/search/SolrSearchResult.java | 10 ++++ .../harvard/iq/dataverse/api/SearchIT.java | 53 ++++++++++++++++++- 7 files changed, 108 insertions(+), 26 deletions(-) diff --git a/conf/solr/4.6.0/schema.xml b/conf/solr/4.6.0/schema.xml index c328c4215af..b9cb29df23f 100644 --- a/conf/solr/4.6.0/schema.xml +++ b/conf/solr/4.6.0/schema.xml @@ -290,6 +290,7 @@ + diff --git a/doc/sphinx-guides/source/api/search.rst b/doc/sphinx-guides/source/api/search.rst index 5448a7171aa..84f202a5b83 100755 --- a/doc/sphinx-guides/source/api/search.rst +++ b/doc/sphinx-guides/source/api/search.rst @@ -12,6 +12,8 @@ Unlike the web interface, this new API is limited to *published* data until `iss The parameters and JSON response are partly inspired by the `GitHub Search API `_. +Please note that in Dataverse 4.3 and older the "citation" field wrapped the persistent ID URL in an ```` tag but this has been changed to plaintext. If you want the old value with HTML in it, a new field called "citationHtml" can be used. + Parameters ---------- @@ -54,7 +56,7 @@ https://apitest.dataverse.org/api/search?q=trees "image_url":"https://apitest.dataverse.org/api/access/dvCardImage/7", "identifier":"trees", "description":"A tree dataverse with some birds", - "published_at":"2015-01-12T16:05:12Z" + "published_at":"2016-05-10T12:53:38Z" }, { "name":"Chestnut Trees", @@ -63,20 +65,21 @@ https://apitest.dataverse.org/api/search?q=trees "image_url":"https://apitest.dataverse.org/api/access/dvCardImage/9", "identifier":"chestnuttrees", "description":"A dataverse with chestnut trees and an oriole", - "published_at":"2015-01-12T18:02:32Z" + "published_at":"2016-05-10T12:52:38Z" }, { "name":"trees.png", "type":"file", "url":"https://apitest.dataverse.org/api/access/datafile/12", - "image_url":"https://apitest.dataverse.org/api/access/preview/12", + "image_url":"https://apitest.dataverse.org/api/access/fileCardImage/12", "file_id":"12", "description":"", - "published_at":"2015-01-12T16:05:44Z", + "published_at":"2016-05-10T12:53:39Z", "file_type":"PNG Image", + "file_content_type":"image/png", "size_in_bytes":8361, "md5":"0386269a5acb2c57b4eade587ff4db64", - "dataset_citation":"Spruce, Sabrina, 2015, \"Spruce Goose\", http://dx.doi.org/10.5072/FK2/Y6RGTQ, Root Dataverse, V1" + "dataset_citation":"Spruce, Sabrina, 2016, \"Spruce Goose\", http://dx.doi.org/10.5072/FK2/NFSEHG, Root Dataverse, V1" }, { "name":"Birds", @@ -85,7 +88,7 @@ https://apitest.dataverse.org/api/search?q=trees "image_url":"https://apitest.dataverse.org/api/access/dvCardImage/2", "identifier":"birds", "description":"A bird dataverse with some trees", - "published_at":"2015-01-12T18:01:51Z" + "published_at":"2016-05-10T12:57:27Z" } ], "count_in_response":4 @@ -97,7 +100,7 @@ https://apitest.dataverse.org/api/search?q=trees Advanced Search Example ----------------------- -https://apitest.dataverse.org/api/search?q=finch&show_relevance=true&show_facets=true&fq=publication_date_s:2015&subtree=birds +https://apitest.dataverse.org/api/search?q=finch&show_relevance=true&show_facets=true&fq=publicationDate:2016&subtree=birds In this example, ``show_relevance=true`` matches per field are shown. Available facets are shown with ``show_facets=true`` and of the facets is being used with ``fq=publication_date_s:2015``. The search is being narrowed to the dataverse with the identifier "birds" with the parameter ``subtree=birds``. @@ -119,7 +122,7 @@ In this example, ``show_relevance=true`` matches per field are shown. Available "image_url":"https://apitest.dataverse.org/api/access/dvCardImage/3", "identifier":"finches", "description":"A dataverse with finches", - "published_at":"2015-01-12T18:01:15Z", + "published_at":"2016-05-10T12:57:38Z", "matches":[ { "description":{ @@ -135,17 +138,19 @@ In this example, ``show_relevance=true`` matches per field are shown. Available ] } } - ] + ], + "score": 3.8500118255615234 }, { "name":"Darwin's Finches", "type":"dataset", - "url":"http://dx.doi.org/10.5072/FK2/CE0052", - "image_url":"https://apitest.dataverse.org/api/access/dsPreview/2", - "global_id":"doi:10.5072/FK2/CE0052", - "published_at":"2015-01-12T18:01:37Z", - "citation":"Finch, Fiona, 2015, \"Darwin's Finches\", http://dx.doi.org/10.5072/FK2/CE0052, Root Dataverse, V1", + "url":"http://dx.doi.org/10.5072/FK2/G2VPE7", + "image_url":"https://apitest.dataverse.org/api/access/dsCardImage/2", + "global_id":"doi:10.5072/FK2/G2VPE7", "description": "Darwin's finches (also known as the Galápagos finches) are a group of about fifteen species of passerine birds.", + "published_at":"2016-05-10T12:57:45Z", + "citationHtml":"Finch, Fiona, 2016, \"Darwin's Finches\", http://dx.doi.org/10.5072/FK2/G2VPE7, Root Dataverse, V1", + "citation":"Finch, Fiona, 2016, \"Darwin's Finches\", http://dx.doi.org/10.5072/FK2/G2VPE7, Root Dataverse, V1", "matches":[ { "authorName":{ @@ -169,6 +174,7 @@ In this example, ``show_relevance=true`` matches per field are shown. Available } } ], + "score": 1.5033848285675049, "authors":[ "Finch, Fiona" ] @@ -176,27 +182,27 @@ In this example, ``show_relevance=true`` matches per field are shown. Available ], "facets":[ { - "dvCategory_s":{ - "friendly":"Dataverse Category", + "subject_ss":{ + "friendly":"Subject", "labels":[ { - "Uncategorized":1 + "Medicine, Health and Life Sciences":2 } ] }, - "affiliation_ss":{ - "friendly":"Affiliation", - "labels":[ + "authorName_ss": { + "friendly":"Author Name", + "labels": [ { - "Birds Inc.":1 + "Finch, Fiona":1 } ] }, - "publication_date_s":{ + "publicationDate":{ "friendly":"Publication Date", "labels":[ { - "2015":2 + "2016":2 } ] } diff --git a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java index 1a4a178a90b..e81b92d3e11 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/IndexServiceBean.java @@ -664,7 +664,8 @@ private String addOrUpdateDataset(IndexableDataset indexableDataset) { if (datasetVersion != null) { solrInputDocument.addField(SearchFields.DATASET_VERSION_ID, datasetVersion.getId()); - solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(true)); + solrInputDocument.addField(SearchFields.DATASET_CITATION, datasetVersion.getCitation(false)); + solrInputDocument.addField(SearchFields.DATASET_CITATION_HTML, datasetVersion.getCitation(true)); if (datasetVersion.isInReview()) { solrInputDocument.addField(SearchFields.PUBLICATION_STATUS, IN_REVIEW_STRING); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java index c34dc7f518d..5ea6a52748f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchFields.java @@ -151,7 +151,7 @@ public class SearchFields { /* * (tabular) Data Tags are indexed as a string, since we are only planning to * use these in facet-like, exact searches: - */ + */ public static final String TABDATA_TAG = "tabularDataTag"; public static final String ACCESS = "fileAccess"; @@ -195,10 +195,21 @@ public class SearchFields { public static final String PARENT_NAME = "parentName"; public static final String PARENT_ID = "parentId"; public static final String PARENT_IDENTIFIER = "parentIdentifier"; + /** + * @todo Should we add a "parentCitationHtml" field now or wait for demand + * for it? + */ public static final String PARENT_CITATION = "parentCitation"; public static final String DATASET_DESCRIPTION = "dsDescriptionValue"; + /** + * In Datavese 4.3 and earlier "citation" was indexed as the "online" or + * HTML version, with the DOI link wrapped in an href tag but now it's the + * plaintext version and anyone who was depending on the old version can + * switch to the new "citationHTML" field. + */ public static final String DATASET_CITATION = "citation"; + public static final String DATASET_CITATION_HTML = "citationHtml"; public static final String DATASET_DEACCESSION_REASON = "deaccessionReason"; /** * In contrast to PUBLICATION_DATE, this field applies only to datasets for diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java index 94a28859f18..eaa6e4e2034 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SearchServiceBean.java @@ -371,6 +371,7 @@ public SolrQueryResponse search(User user, Dataverse dataverse, String query, Li logger.fine("score for " + id + ": " + score); String identifier = (String) solrDocument.getFieldValue(SearchFields.IDENTIFIER); String citation = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION); + String citationPlainHtml = (String) solrDocument.getFieldValue(SearchFields.DATASET_CITATION_HTML); String persistentUrl = (String) solrDocument.getFieldValue(SearchFields.PERSISTENT_URL); String name = (String) solrDocument.getFieldValue(SearchFields.NAME); String nameSort = (String) solrDocument.getFieldValue(SearchFields.NAME_SORT); @@ -475,6 +476,7 @@ public SolrQueryResponse search(User user, Dataverse dataverse, String query, Li solrSearchResult.setDatasetVersionId(datasetVersionId); solrSearchResult.setCitation(citation); + solrSearchResult.setCitationHtml(citationPlainHtml); if (title != null) { // solrSearchResult.setTitle((String) titles.get(0)); solrSearchResult.setTitle((String) title); diff --git a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java index b58c0041652..fa738f0ea6d 100644 --- a/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java +++ b/src/main/java/edu/harvard/iq/dataverse/search/SolrSearchResult.java @@ -59,6 +59,7 @@ public class SolrSearchResult { private Map parent; private String dataverseAffiliation; private String citation; + private String citationHtml; /** * Files and datasets might have a UNF. Dataverses don't. */ @@ -497,6 +498,7 @@ public JsonObjectBuilder json(boolean showRelevance, boolean showEntityIds, bool .add("unf", getUnf()) .add("dataset_citation", datasetCitation) .add("deaccession_reason", this.deaccessionReason) + .add("citationHtml", this.citationHtml) .add("citation", this.citation); // Now that nullSafeJsonBuilder has been instatiated, check for null before adding to it! if (showRelevance) { @@ -764,6 +766,14 @@ public void setCitation(String citation) { this.citation = citation; } + public String getCitationHtml() { + return citationHtml; + } + + public void setCitationHtml(String citationHtml) { + this.citationHtml = citationHtml; + } + public String getFiletype() { return filetype; } diff --git a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java index 284dd8ff7d3..cc513283cdc 100644 --- a/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java +++ b/src/test/java/edu/harvard/iq/dataverse/api/SearchIT.java @@ -38,8 +38,10 @@ import static com.jayway.restassured.RestAssured.given; import static com.jayway.restassured.path.json.JsonPath.with; import static com.jayway.restassured.path.xml.XmlPath.from; -import static java.lang.Thread.sleep; import static junit.framework.Assert.assertEquals; +import static java.lang.Thread.sleep; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; public class SearchIT { @@ -117,6 +119,49 @@ public static void setUpClass() { } + @Test + public void testSearchCitation() { + + Response createUser = UtilIT.createRandomUser(); + createUser.prettyPrint(); + String username = UtilIT.getUsernameFromResponse(createUser); + String apiToken = UtilIT.getApiTokenFromResponse(createUser); + + Response createDataverseResponse = UtilIT.createRandomDataverse(apiToken); + createDataverseResponse.prettyPrint(); + String dataverseAlias = UtilIT.getAliasFromResponse(createDataverseResponse); + + Response createDatasetResponse = UtilIT.createRandomDatasetViaNativeApi(dataverseAlias, apiToken); + createDatasetResponse.prettyPrint(); + Integer datasetId = UtilIT.getDatasetIdFromResponse(createDatasetResponse); + Response solrResponse = querySolr("id:dataset_" + datasetId + "_draft"); + solrResponse.prettyPrint(); + Response enableNonPublicSearch = enableSetting(SettingsServiceBean.Key.SearchApiNonPublicAllowed); + assertEquals(200, enableNonPublicSearch.getStatusCode()); + Response searchResponse = search("id:dataset_" + datasetId + "_draft", apiToken); + searchResponse.prettyPrint(); + assertFalse(searchResponse.body().jsonPath().getString("data.items[0].citation").contains("href")); + assertTrue(searchResponse.body().jsonPath().getString("data.items[0].citationHtml").contains("href")); + + Response deleteDatasetResponse = UtilIT.deleteDatasetViaNativeApi(datasetId, apiToken); + deleteDatasetResponse.prettyPrint(); + assertEquals(200, deleteDatasetResponse.getStatusCode()); + + Response deleteDataverseResponse = UtilIT.deleteDataverse(dataverseAlias, apiToken); + deleteDataverseResponse.prettyPrint(); + assertEquals(200, deleteDataverseResponse.getStatusCode()); + + makeSuperuser(username); + search("finch&show_relevance=true&show_facets=true&fq=publicationDate:2016&subtree=birds", apiToken).prettyPrint(); + + search("trees", apiToken).prettyPrint(); + + Response deleteUserResponse = UtilIT.deleteUser(username); + deleteUserResponse.prettyPrint(); + assertEquals(200, deleteUserResponse.getStatusCode()); + + } + @Test public void homerGivesNedPermissionAtRoot() { @@ -898,6 +943,12 @@ private Response search(TestSearchQuery query, TestUser user) { ); } + static Response search(String query, String apiToken) { + return given() + .header(keyString, apiToken) + .get("/api/search?q=" + query); + } + private Response uploadZipFile(String persistentId, String zipFileName, String apiToken) throws FileNotFoundException { String pathToFileName = "scripts/search/data/binary/" + zipFileName; Path path = Paths.get(pathToFileName); From 70c355d54c6f985d396f09af89da6ea41685ed64 Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Tue, 10 May 2016 13:41:16 -0400 Subject: [PATCH 25/37] some cleanup in DataCitation code --- .../harvard/iq/dataverse/DataCitation.java | 32 +++++++++---------- 1 file changed, 15 insertions(+), 17 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java index 8f68ffbf0cb..4ce1b3acc9b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataCitation.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataCitation.java @@ -5,12 +5,14 @@ */ package edu.harvard.iq.dataverse; +import edu.emory.mathcs.backport.java.util.Arrays; import java.net.URL; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Date; import java.util.List; +import java.util.stream.Collectors; import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; @@ -167,17 +169,19 @@ public String toString() { } public String toString(boolean html) { - + // first add comma separated parts List citationList = new ArrayList<>(); - - // first add comma separated parts - addNonEmptyStringToList(citationList, formatString(authors, html)); - addNonEmptyStringToList(citationList, formatNonEmptyDate(citationDate,"yyyy")); - addNonEmptyStringToList(citationList, formatString(title, html, "\"")); - addNonEmptyStringToList(citationList, formatURL(persistentId.toURL(), html)); - addNonEmptyStringToList(citationList, formatString(distributors, html)); - addNonEmptyStringToList(citationList, version); - StringBuilder citation = new StringBuilder(StringUtils.join(citationList, ", ")); + citationList.add(formatString(authors, html)); + citationList.add(formatNonEmptyDate(citationDate,"yyyy")); + citationList.add(formatString(title, html, "\"")); + citationList.add(formatURL(persistentId.toURL(), html)); + citationList.add(formatString(distributors, html)); + citationList.add(version); + + StringBuilder citation = new StringBuilder( + citationList.stream() + .filter( value -> !StringUtils.isEmpty(value) ) + .collect(Collectors.joining(", "))); // append UNF if (!StringUtils.isEmpty(UNF)) { @@ -188,13 +192,7 @@ public String toString(boolean html) { } - // helper methods - private void addNonEmptyStringToList(List list, String value) { - if (!StringUtils.isEmpty(value)) { - list.add(value); - } - } - + // helper methods private String formatString(String value, boolean escapeHtml) { return formatString(value, escapeHtml, ""); } From 74a8d56b598299407a24eb15284386f559f856fa Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Tue, 10 May 2016 13:58:00 -0400 Subject: [PATCH 26/37] search include fragment now shows new citationHtml --- src/main/webapp/search-include-fragment.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/search-include-fragment.xhtml b/src/main/webapp/search-include-fragment.xhtml index 58a1cd26e0b..485d95e1650 100644 --- a/src/main/webapp/search-include-fragment.xhtml +++ b/src/main/webapp/search-include-fragment.xhtml @@ -516,7 +516,7 @@ - + Date: Tue, 10 May 2016 14:55:30 -0400 Subject: [PATCH 27/37] my Data now shows new citationHtml --- src/main/webapp/mydata_templates/cards_minimum.html | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/main/webapp/mydata_templates/cards_minimum.html b/src/main/webapp/mydata_templates/cards_minimum.html index 77059527ba0..6fa4066d48b 100644 --- a/src/main/webapp/mydata_templates/cards_minimum.html +++ b/src/main/webapp/mydata_templates/cards_minimum.html @@ -62,7 +62,8 @@ {{ card_info.date_to_display_on_card }} - {{ card_info.parentName }} Dataverse - {{ card_info.citation|safe }} + + {%if card_info.citationHtml %}{{ card_info.citationHtml|safe }}{% else %}{{ card_info.citation|safe }}{% endif %} {% elif card_info.type == "file" %}
From 8651ee835469d0c35feff9b1b8a6ed77f2c60b0f Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 10 May 2016 17:08:12 -0400 Subject: [PATCH 28/37] Add Markup Checker to various fields --- src/main/webapp/dataset-license-terms.xhtml | 6 ++++-- src/main/webapp/dataset.xhtml | 8 ++++---- src/main/webapp/manage-guestbooks.xhtml | 10 +++++++--- 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index f1f462a0294..b903a04fe66 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -562,10 +562,12 @@
- + +
- + +
diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 6f8dd18d663..572cf4b944a 100755 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -15,6 +15,7 @@ + @@ -374,7 +375,7 @@ #{DatasetPage.datasetVersionUI.datasetRelPublications.get(0).title}
@@ -383,7 +384,6 @@ #{DatasetPage.datasetVersionUI.notes.datasetFieldType.title}
-
@@ -1324,7 +1324,7 @@

- +

#{bundle['dataset.publishBoth.tip']} @@ -1340,7 +1340,7 @@ #{bundle['dataset.republish.tip']}

- +

diff --git a/src/main/webapp/manage-guestbooks.xhtml b/src/main/webapp/manage-guestbooks.xhtml index 3348ecdf55f..54e4f35d3dd 100644 --- a/src/main/webapp/manage-guestbooks.xhtml +++ b/src/main/webapp/manage-guestbooks.xhtml @@ -4,7 +4,8 @@ xmlns:f="http://java.sun.com/jsf/core" xmlns:ui="http://java.sun.com/jsf/facelets" xmlns:p="http://primefaces.org/ui" - xmlns:jsf="http://xmlns.jcp.org/jsf"> + xmlns:jsf="http://xmlns.jcp.org/jsf" + xmlns:o="http://omnifaces.org/ui"> @@ -18,6 +19,7 @@ +

@@ -182,12 +184,14 @@

- + +

- + +

From a7fdea3959b03656f0176c5b59adf9f1e0a936b1 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Tue, 10 May 2016 17:17:09 -0400 Subject: [PATCH 29/37] fix import --- src/main/webapp/dataset.xhtml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/webapp/dataset.xhtml b/src/main/webapp/dataset.xhtml index 572cf4b944a..733bec67f58 100755 --- a/src/main/webapp/dataset.xhtml +++ b/src/main/webapp/dataset.xhtml @@ -15,7 +15,6 @@ - @@ -24,6 +23,7 @@ + From bd9c1a16237d758250c7332da2703552d6382290 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 11 May 2016 08:47:38 -0400 Subject: [PATCH 30/37] Fix DV tag line and Permissions Success Message --- .../edu/harvard/iq/dataverse/ManagePermissionsPage.java | 3 ++- .../edu/harvard/iq/dataverse/RolePermissionFragment.java | 3 ++- src/main/webapp/dataverse_header.xhtml | 8 +++++--- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java b/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java index 36902669816..b0125cce040 100644 --- a/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java +++ b/src/main/java/edu/harvard/iq/dataverse/ManagePermissionsPage.java @@ -38,6 +38,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; +import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; /** @@ -396,7 +397,7 @@ private void notifyRoleChange(RoleAssignee ra, UserNotification.Type type) { private void assignRole(RoleAssignee ra, DataverseRole r) { try { commandEngine.submit(new AssignRoleCommand(ra, r, dvObject, dvRequestService.getDataverseRequest())); - JsfHelper.addSuccessMessage(r.getName() + " role assigned to " + ra.getDisplayInfo().getTitle() + " for " + dvObject.getDisplayName() + "."); + JsfHelper.addSuccessMessage(r.getName() + " role assigned to " + ra.getDisplayInfo().getTitle() + " for " + StringEscapeUtils.escapeHtml(dvObject.getDisplayName()) + "."); // don't notify if role = file downloader and object is not released if (!(r.getAlias().equals(DataverseRole.FILE_DOWNLOADER) && !dvObject.isReleased()) ){ notifyRoleChange(ra, UserNotification.Type.ASSIGNROLE); diff --git a/src/main/java/edu/harvard/iq/dataverse/RolePermissionFragment.java b/src/main/java/edu/harvard/iq/dataverse/RolePermissionFragment.java index c420c4692f2..99e7cded743 100644 --- a/src/main/java/edu/harvard/iq/dataverse/RolePermissionFragment.java +++ b/src/main/java/edu/harvard/iq/dataverse/RolePermissionFragment.java @@ -34,6 +34,7 @@ import javax.inject.Named; import javax.persistence.EntityManager; import javax.persistence.PersistenceContext; +import org.apache.commons.lang.StringEscapeUtils; import org.apache.commons.lang.StringUtils; /** @@ -187,7 +188,7 @@ public void assignRole(ActionEvent evt) { private void assignRole(RoleAssignee ra, DataverseRole r) { try { commandEngine.submit(new AssignRoleCommand(ra, r, dvObject, dvRequestService.getDataverseRequest())); - JH.addMessage(FacesMessage.SEVERITY_INFO, "Role " + r.getName() + " assigned to " + ra.getDisplayInfo().getTitle() + " on " + dvObject.getDisplayName()); + JH.addMessage(FacesMessage.SEVERITY_INFO, "Role " + r.getName() + " assigned to " + ra.getDisplayInfo().getTitle() + " on " + StringEscapeUtils.escapeHtml(dvObject.getDisplayName())); } catch (CommandException ex) { JH.addMessage(FacesMessage.SEVERITY_ERROR, "Can't assign role: " + ex.getMessage()); } diff --git a/src/main/webapp/dataverse_header.xhtml b/src/main/webapp/dataverse_header.xhtml index e38a66e4c60..2ba378390df 100644 --- a/src/main/webapp/dataverse_header.xhtml +++ b/src/main/webapp/dataverse_header.xhtml @@ -5,7 +5,8 @@ xmlns:p="http://primefaces.org/ui" xmlns:pt="http://xmlns.jcp.org/jsf/passthrough" xmlns:jsf="http://xmlns.jcp.org/jsf" - xmlns:iqbs="http://xmlns.jcp.org/jsf/composite/iqbs"> + xmlns:iqbs="http://xmlns.jcp.org/jsf/composite/iqbs" + xmlns:o="http://omnifaces.org/ui">
@@ -164,6 +165,7 @@
+
- +
From 4181948e290014b390ee44ccb2e7d4fefdfb3f63 Mon Sep 17 00:00:00 2001 From: Stephen Kraffmiller Date: Wed, 11 May 2016 12:04:11 -0400 Subject: [PATCH 31/37] Escape GB Custom questions and DV Tagline --- src/main/webapp/dataset-license-terms.xhtml | 5 +++-- src/main/webapp/dataverse_header.xhtml | 6 +++--- src/main/webapp/manage-guestbooks.xhtml | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/main/webapp/dataset-license-terms.xhtml b/src/main/webapp/dataset-license-terms.xhtml index b903a04fe66..0b55553d282 100644 --- a/src/main/webapp/dataset-license-terms.xhtml +++ b/src/main/webapp/dataset-license-terms.xhtml @@ -24,6 +24,7 @@
+
- +
- +
diff --git a/src/main/webapp/dataverse_header.xhtml b/src/main/webapp/dataverse_header.xhtml index 2ba378390df..3a6f2acacca 100644 --- a/src/main/webapp/dataverse_header.xhtml +++ b/src/main/webapp/dataverse_header.xhtml @@ -165,7 +165,7 @@
- +
- +
diff --git a/src/main/webapp/manage-guestbooks.xhtml b/src/main/webapp/manage-guestbooks.xhtml index 54e4f35d3dd..e71a26174e2 100644 --- a/src/main/webapp/manage-guestbooks.xhtml +++ b/src/main/webapp/manage-guestbooks.xhtml @@ -19,7 +19,7 @@ - +
@@ -185,13 +185,13 @@

- +

- +

From 43e21937ba546aa647622eadb5f2530d28437f78 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 11 May 2016 15:05:43 -0400 Subject: [PATCH 32/37] Committing a quick CORS header fix into the patch branch. Will update #1136 to reflect this. --- src/main/java/edu/harvard/iq/dataverse/api/Search.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java index 46f94e1208c..c65084cedb7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java @@ -27,9 +27,11 @@ import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; +import javax.servlet.http.HttpServletResponse; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import org.apache.commons.lang.StringUtils; @@ -65,7 +67,8 @@ public Response search( @QueryParam("fq") final List filterQueries, @QueryParam("show_entity_ids") boolean showEntityIds, @QueryParam("show_api_urls") boolean showApiUrls, - @QueryParam("show_my_data") boolean showMyData + @QueryParam("show_my_data") boolean showMyData, + @Context HttpServletResponse response ) { User user; @@ -182,6 +185,7 @@ public Response search( */ return errorResponse(Response.Status.BAD_REQUEST, solrQueryResponse.getError()); } + response.setHeader("Access-Control-Allow-Origin", "*"); return okResponse(value); } else { return errorResponse(Response.Status.BAD_REQUEST, "q parameter is missing"); From dde14c5fc27618ca3ad61cdaf6db3068cdc0283f Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 11 May 2016 17:51:53 -0400 Subject: [PATCH 33/37] Revert "resolving a (weird?) merge conflict on the branch?" This reverts commit 815ef304b246cab3727b326608b5f86dd6922461, reversing changes made to 4181948e290014b390ee44ccb2e7d4fefdfb3f63. --- pom.xml | 26 +- .../edu/harvard/iq/dataverse/Dataset.java | 29 +- .../harvard/iq/dataverse/DatasetVersion.java | 1 - .../edu/harvard/iq/dataverse/Dataverse.java | 13 +- .../iq/dataverse/DataverseServiceBean.java | 8 +- .../dataverse/HarvestingDataverseConfig.java | 167 ++++++ .../iq/dataverse/api/BatchServiceBean.java | 2 +- .../harvard/iq/dataverse/api/Datasets.java | 5 +- .../harvard/iq/dataverse/api/Dataverses.java | 3 - .../harvard/iq/dataverse/api/Harvesting.java | 305 ---------- .../edu/harvard/iq/dataverse/api/Search.java | 6 +- .../api/imports/ImportServiceBean.java | 105 ---- .../impl/CreateHarvestingClientCommand.java | 37 -- .../impl/DeleteHarvestingClientCommand.java | 39 -- .../impl/GetHarvestingClientCommand.java | 51 -- .../impl/UpdateHarvestingClientCommand.java | 36 -- .../harvest/client/ClientHarvestRun.java | 186 ------- .../harvest/client/FastGetRecord.java | 520 ------------------ .../harvest/client/HarvestTimerInfo.java | 97 ---- .../harvest/client/HarvesterServiceBean.java | 431 --------------- .../harvest/client/HarvestingClient.java | 486 ---------------- .../client/HarvestingClientServiceBean.java | 143 ----- .../timer/DataverseTimerServiceBean.java | 252 --------- .../iq/dataverse/util/json/JsonParser.java | 17 +- 24 files changed, 194 insertions(+), 2771 deletions(-) create mode 100644 src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java delete mode 100644 src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java diff --git a/pom.xml b/pom.xml index 862d8fffe48..e15f5a87529 100644 --- a/pom.xml +++ b/pom.xml @@ -39,7 +39,7 @@ Local repository for hosting jars not available from network repositories. file://${project.basedir}/local_lib - + dataone.org http://dev-testing.dataone.org/maven @@ -49,7 +49,6 @@ true - @@ -369,28 +368,7 @@ log4j 1.2.17 - - - - - - org.dspace - oclc-harvester2 - 0.1.12 - - - - - com.lyncode - xoai-common - 4.1.0 - - - com.lyncode - xoai-service-provider - 4.1.0 - - + diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataset.java b/src/main/java/edu/harvard/iq/dataverse/Dataset.java index 3c0fd8e70d8..6b019ab9a4a 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataset.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataset.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import java.nio.file.Path; import java.nio.file.Paths; import java.text.SimpleDateFormat; @@ -522,24 +521,24 @@ public boolean isHarvested() { public String getRemoteArchiveURL() { if (isHarvested()) { - if (HarvestingClient.HARVEST_STYLE_DATAVERSE.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { - return this.getOwner().getHarvestingClientConfig().getArchiveUrl() + "/dataset.xhtml?persistentId=" + getGlobalId(); - } else if (HarvestingClient.HARVEST_STYLE_VDC.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { - String rootArchiveUrl = this.getOwner().getHarvestingClientConfig().getHarvestingUrl(); + if (HarvestingDataverseConfig.HARVEST_STYLE_DATAVERSE.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { + return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl() + "/dataset.xhtml?persistentId=" + getGlobalId(); + } else if (HarvestingDataverseConfig.HARVEST_STYLE_VDC.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { + String rootArchiveUrl = this.getOwner().getHarvestingDataverseConfig().getHarvestingUrl(); int c = rootArchiveUrl.indexOf("/OAIHandler"); if (c > 0) { rootArchiveUrl = rootArchiveUrl.substring(0, c); return rootArchiveUrl + "/faces/study/StudyPage.xhtml?globalId=" + getGlobalId(); } - } else if (HarvestingClient.HARVEST_STYLE_ICPSR.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { + } else if (HarvestingDataverseConfig.HARVEST_STYLE_ICPSR.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { // For the ICPSR, it turns out that the best thing to do is to // rely on the DOI to send the user to the right landing page for // the study: //String icpsrId = identifier; - //return this.getOwner().getHarvestingClientConfig().getArchiveUrl() + "/icpsrweb/ICPSR/studies/"+icpsrId+"?q="+icpsrId+"&searchSource=icpsr-landing"; + //return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl() + "/icpsrweb/ICPSR/studies/"+icpsrId+"?q="+icpsrId+"&searchSource=icpsr-landing"; return "http://doi.org/" + authority + "/" + identifier; - } else if (HarvestingClient.HARVEST_STYLE_NESSTAR.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { - String nServerURL = this.getOwner().getHarvestingClientConfig().getArchiveUrl(); + } else if (HarvestingDataverseConfig.HARVEST_STYLE_NESSTAR.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { + String nServerURL = this.getOwner().getHarvestingDataverseConfig().getArchiveUrl(); // chop any trailing slashes in the server URL - or they will result // in multiple slashes in the final URL pointing to the study // on server of origin; Nesstar doesn't like it, apparently. @@ -557,9 +556,9 @@ public String getRemoteArchiveURL() { + "&top=yes"; return NesstarWebviewPage; - } else if (HarvestingClient.HARVEST_STYLE_ROPER.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { - return this.getOwner().getHarvestingClientConfig().getArchiveUrl() + "/CFIDE/cf/action/catalog/abstract.cfm?archno=" + identifier; - } else if (HarvestingClient.HARVEST_STYLE_HGL.equals(this.getOwner().getHarvestingClientConfig().getHarvestStyle())) { + } else if (HarvestingDataverseConfig.HARVEST_STYLE_ROPER.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { + return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl() + "/CFIDE/cf/action/catalog/abstract.cfm?archno=" + identifier; + } else if (HarvestingDataverseConfig.HARVEST_STYLE_HGL.equals(this.getOwner().getHarvestingDataverseConfig().getHarvestStyle())) { // a bit of a hack, true. // HGL documents, when turned into Dataverse studies/datasets // all 1 datafile; the location ("storage identifier") of the file @@ -575,9 +574,9 @@ public String getRemoteArchiveURL() { } } } - return this.getOwner().getHarvestingClientConfig().getArchiveUrl(); + return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl(); }else { - return this.getOwner().getHarvestingClientConfig().getArchiveUrl(); + return this.getOwner().getHarvestingDataverseConfig().getArchiveUrl(); } } @@ -586,7 +585,7 @@ public String getRemoteArchiveURL() { public String getHarvestingDescription() { if (isHarvested()) { - return this.getOwner().getHarvestingClientConfig().getArchiveDescription(); + return this.getOwner().getHarvestingDataverseConfig().getArchiveDescription(); } return null; diff --git a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java index e5785cbfad1..469375828c9 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java +++ b/src/main/java/edu/harvard/iq/dataverse/DatasetVersion.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.util.MarkupChecker; import edu.harvard.iq.dataverse.DatasetFieldType.FieldType; import edu.harvard.iq.dataverse.util.StringUtil; diff --git a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java index c3580f807fe..b97d3402f81 100644 --- a/src/main/java/edu/harvard/iq/dataverse/Dataverse.java +++ b/src/main/java/edu/harvard/iq/dataverse/Dataverse.java @@ -1,6 +1,5 @@ package edu.harvard.iq.dataverse; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.authorization.DataverseRole; import edu.harvard.iq.dataverse.search.savedsearch.SavedSearch; import java.util.ArrayList; @@ -278,18 +277,18 @@ public void setGuestbooks(List guestbooks) { } @OneToOne (mappedBy="dataverse", cascade={CascadeType.PERSIST, CascadeType.REMOVE}) - private HarvestingClient harvestingClient; + private HarvestingDataverseConfig harvestingDataverseConfig; - public HarvestingClient getHarvestingClientConfig() { - return this.harvestingClient; + public HarvestingDataverseConfig getHarvestingDataverseConfig() { + return this.harvestingDataverseConfig; } - public void setHarvestingClientConfig(HarvestingClient harvestingClient) { - this.harvestingClient = harvestingClient; + public void setHarvestingDataverseConfig(HarvestingDataverseConfig harvestingDataverseConfig) { + this.harvestingDataverseConfig = harvestingDataverseConfig; } public boolean isHarvested() { - return harvestingClient != null; + return harvestingDataverseConfig != null; } diff --git a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java index 34cb48e9f68..b5f5d78f716 100644 --- a/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/DataverseServiceBean.java @@ -30,8 +30,6 @@ import java.util.jar.Manifest; import javax.ejb.EJB; import javax.ejb.Stateless; -import javax.ejb.TransactionAttribute; -import javax.ejb.TransactionAttributeType; import javax.inject.Inject; import javax.inject.Named; import javax.persistence.EntityManager; @@ -419,7 +417,7 @@ public List findDataversesThatLinkToThisDatasetId(long datasetId) { */ public Map getAllHarvestedDataverseDescriptions(){ - String qstr = "SELECT dataverse_id, archiveDescription FROM harvestingClient;"; + String qstr = "SELECT dataverse_id, archiveDescription FROM harvestingDataverseConfig;"; List searchResults = null; try { @@ -452,10 +450,6 @@ public Map getAllHarvestedDataverseDescriptions(){ return ret; } - - public List getAllHarvestedDataverses() { - return em.createQuery("SELECT object(d) FROM Dataverse d, harvestingClient c AS d WHERE c.dataverse.id=d.id order by d.id").getResultList(); - } public void populateDvSearchCard(SolrSearchResult solrSearchResult) { diff --git a/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java new file mode 100644 index 00000000000..28df6e19e65 --- /dev/null +++ b/src/main/java/edu/harvard/iq/dataverse/HarvestingDataverseConfig.java @@ -0,0 +1,167 @@ +/* + * To change this license header, choose License Headers in Project Properties. + * To change this template file, choose Tools | Templates + * and open the template in the editor. + */ +package edu.harvard.iq.dataverse; + +import java.io.Serializable; +import javax.persistence.CascadeType; +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Index; +import javax.persistence.JoinColumn; +import javax.persistence.OneToOne; +import javax.persistence.Table; + +/** + * + * @author Leonid Andreev + */ +@Entity +@Table(indexes = {@Index(columnList="dataverse_id") + , @Index(columnList="harvesttype") + , @Index(columnList="harveststyle") + , @Index(columnList="harvestingurl")}) +public class HarvestingDataverseConfig implements Serializable { + private static final long serialVersionUID = 1L; + @Id + @GeneratedValue(strategy = GenerationType.AUTO) + private Long id; + + public Long getId() { + return id; + } + + public void setId(Long id) { + this.id = id; + } + + public static final String HARVEST_TYPE_OAI="oai"; + public static final String HARVEST_TYPE_NESSTAR="nesstar"; + + public static final String HARVEST_STYLE_DATAVERSE="dataverse"; + // pre-4.0 remote Dataverse: + public static final String HARVEST_STYLE_VDC="vdc"; + public static final String HARVEST_STYLE_ICPSR="icpsr"; + public static final String HARVEST_STYLE_NESSTAR="nesstar"; + public static final String HARVEST_STYLE_ROPER="roper"; + public static final String HARVEST_STYLE_HGL="hgl"; + public static final String HARVEST_STYLE_DEFAULT="default"; + + public static final String REMOTE_ARCHIVE_URL_LEVEL_DATAVERSE="dataverse"; + public static final String REMOTE_ARCHIVE_URL_LEVEL_DATASET="dataset"; + public static final String REMOTE_ARCHIVE_URL_LEVEL_FILE="file"; + + public HarvestingDataverseConfig() { + this.harvestType = HARVEST_TYPE_OAI; // default harvestType + this.harvestStyle = HARVEST_STYLE_DATAVERSE; // default harvestStyle + } + + + @OneToOne (cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST }) + @JoinColumn(name="dataverse_id") + private Dataverse dataverse; + + public Dataverse getDataverse() { + return this.dataverse; + } + + public void setDataverse(Dataverse dataverse) { + this.dataverse = dataverse; + } + + String harvestType; + + public String getHarvestType() { + return harvestType; + } + + public void setHarvestType(String harvestType) { + this.harvestType = harvestType; + } + + String harvestStyle; + + public String getHarvestStyle() { + return harvestStyle; + } + + public void setHarvestStyle(String harvestStyle) { + this.harvestStyle = harvestStyle; + } + + private String harvestingUrl; + + public String getHarvestingUrl() { + return this.harvestingUrl; + } + + public void setHarvestingUrl(String harvestingUrl) { + this.harvestingUrl = harvestingUrl.trim(); + } + + private String archiveUrl; + + public String getArchiveUrl() { + return this.archiveUrl; + } + + public void setArchiveUrl(String archiveUrl) { + this.archiveUrl = archiveUrl; + } + + @Column(columnDefinition="TEXT") + private String archiveDescription; + + public String getArchiveDescription() { + return this.archiveDescription; + } + + public void setArchiveDescription(String archiveDescription) { + this.archiveDescription = archiveDescription; + } + + private String harvestingSet; + + public String getHarvestingSet() { + return this.harvestingSet; + } + + public void setHarvestingSet(String harvestingSet) { + this.harvestingSet = harvestingSet; + } + + + + + + @Override + public int hashCode() { + int hash = 0; + hash += (id != null ? id.hashCode() : 0); + return hash; + } + + @Override + public boolean equals(Object object) { + // TODO: Warning - this method won't work in the case the id fields are not set + if (!(object instanceof HarvestingDataverseConfig)) { + return false; + } + HarvestingDataverseConfig other = (HarvestingDataverseConfig) object; + if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { + return false; + } + return true; + } + + @Override + public String toString() { + return "edu.harvard.iq.dataverse.HarvestingDataverse[ id=" + id + " ]"; + } + +} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java index d41fed2652d..7bd0635cf7f 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/BatchServiceBean.java @@ -5,6 +5,7 @@ import edu.harvard.iq.dataverse.DataverseServiceBean; import edu.harvard.iq.dataverse.api.imports.ImportException; import edu.harvard.iq.dataverse.api.imports.ImportUtil; +import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; import edu.harvard.iq.dataverse.engine.command.DataverseRequest; import java.io.File; import java.io.FileWriter; @@ -106,6 +107,5 @@ public JsonArrayBuilder handleDirectory(DataverseRequest dataverseRequest, File } return status; } - } diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java index 98d0af8bde3..7f5f8a9533c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Datasets.java @@ -57,9 +57,6 @@ import javax.ws.rs.QueryParam; import javax.ws.rs.core.MediaType; import javax.ws.rs.core.Response; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.errorResponse; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.errorResponse; -import static edu.harvard.iq.dataverse.api.AbstractApiBean.errorResponse; @Path("datasets") public class Datasets extends AbstractApiBean { @@ -315,7 +312,7 @@ public Response updateDraftVersion( String jsonBody, @PathParam("id") String id, boolean updateDraft = ds.getLatestVersion().isDraft(); DatasetVersion managedVersion = execCommand( updateDraft ? new UpdateDatasetVersionCommand(req, incomingVersion) - : new CreateDatasetVersionCommand(req, ds, incomingVersion)); + : new CreateDatasetVersionCommand(req, ds, incomingVersion)); return okResponse( json(managedVersion) ); } catch (JsonParseException ex) { diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java index ab8ce890332..a787f3e26ee 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Dataverses.java @@ -79,9 +79,6 @@ import javax.ws.rs.core.Response; import javax.ws.rs.core.Response.Status; import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; -import static edu.harvard.iq.dataverse.util.json.JsonPrinter.json; /** * A REST API for dataverses. diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java b/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java deleted file mode 100644 index 37d505ff2be..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/api/Harvesting.java +++ /dev/null @@ -1,305 +0,0 @@ -package edu.harvard.iq.dataverse.api; - -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; - -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.impl.CreateHarvestingClientCommand; -import edu.harvard.iq.dataverse.engine.command.impl.GetHarvestingClientCommand; -import edu.harvard.iq.dataverse.engine.command.impl.UpdateHarvestingClientCommand; -import edu.harvard.iq.dataverse.harvest.client.ClientHarvestRun; -import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClientServiceBean; -import edu.harvard.iq.dataverse.util.json.JsonParseException; -import javax.json.JsonObjectBuilder; -import static edu.harvard.iq.dataverse.util.json.NullSafeJsonBuilder.jsonObjectBuilder; -import java.io.IOException; -import java.io.StringReader; -import java.util.List; -import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.json.Json; -import javax.json.JsonArrayBuilder; -import javax.json.JsonObject; -import javax.ws.rs.GET; -import javax.ws.rs.POST; -import javax.ws.rs.PUT; -import javax.ws.rs.Path; -import javax.ws.rs.PathParam; -import javax.ws.rs.QueryParam; -import javax.ws.rs.core.Response; - -@Stateless -@Path("harvest/client") -public class Harvesting extends AbstractApiBean { - - - @EJB - DataverseServiceBean dataverseService; - @EJB - HarvesterServiceBean harvesterService; - @EJB - HarvestingClientServiceBean harvestingClientService; - - private static final Logger logger = Logger.getLogger(Harvesting.class.getName()); - /* - * /api/harvest/client - * and - * /api/harvest/client/{nickname} - * will, by default, return a JSON record with the information about the - * configured remote archives. - * optionally, plain text output may be provided as well. - */ - @GET - @Path("") - public Response harvestingClients(@QueryParam("key") String apiKey) throws IOException { - - List harvestingClients = null; - try { - harvestingClients = harvestingClientService.getAllHarvestingClients(); - } catch (Exception ex) { - return errorResponse( Response.Status.INTERNAL_SERVER_ERROR, "Caught an exception looking up configured harvesting clients; " + ex.getMessage() ); - } - - if (harvestingClients == null) { - // returning an empty list: - return okResponse(jsonObjectBuilder().add("harvestingClients","")); - } - - JsonArrayBuilder hcArr = Json.createArrayBuilder(); - - for (HarvestingClient harvestingClient : harvestingClients) { - // We already have this harvestingClient - wny do we need to - // execute this "Get Harvesting Client Command" in order to get it, - // again? - the purpose of the command is to run the request through - // the Authorization system, to verify that they actually have - // the permission to view this harvesting client config. -- L.A. 4.4 - HarvestingClient retrievedHarvestingClient = null; - try { - DataverseRequest req = createDataverseRequest(findUserOrDie()); - retrievedHarvestingClient = execCommand( new GetHarvestingClientCommand(req, harvestingClient)); - } catch (Exception ex) { - // Don't do anything. - // We'll just skip this one - since this means the user isn't - // authorized to view this client configuration. - } - - if (retrievedHarvestingClient != null) { - hcArr.add(harvestingConfigAsJson(retrievedHarvestingClient)); - } - } - - return okResponse(jsonObjectBuilder().add("harvestingClients", hcArr)); - } - - @GET - @Path("{nickName}") - public Response harvestingClient(@PathParam("nickName") String nickName, @QueryParam("key") String apiKey) throws IOException { - - HarvestingClient harvestingClient = null; - try { - harvestingClient = harvestingClientService.findByNickname(nickName); - } catch (Exception ex) { - logger.warning("Exception caught looking up harvesting client " + nickName + ": " + ex.getMessage()); - return errorResponse( Response.Status.BAD_REQUEST, "Internal error: failed to look up harvesting client " + nickName + "."); - } - - if (harvestingClient == null) { - return errorResponse(Response.Status.NOT_FOUND, "Harvesting client " + nickName + " not found."); - } - - HarvestingClient retrievedHarvestingClient = null; - - try { - // findUserOrDie() and execCommand() both throw WrappedResponse - // exception, that already has a proper HTTP response in it. - - retrievedHarvestingClient = execCommand(new GetHarvestingClientCommand(createDataverseRequest(findUserOrDie()), harvestingClient)); - logger.info("retrieved Harvesting Client " + retrievedHarvestingClient.getName() + " with the GetHarvestingClient command."); - } catch (WrappedResponse wr) { - return wr.getResponse(); - } catch (Exception ex) { - logger.warning("Unknown exception caught while executing GetHarvestingClientCommand: "+ex.getMessage()); - retrievedHarvestingClient = null; - } - - if (retrievedHarvestingClient == null) { - return errorResponse( Response.Status.BAD_REQUEST, - "Internal error: failed to retrieve harvesting client " + nickName + "."); - } - - try { - return okResponse(harvestingConfigAsJson(retrievedHarvestingClient)); - } catch (Exception ex) { - logger.warning("Unknown exception caught while trying to format harvesting client config as json: "+ex.getMessage()); - return errorResponse( Response.Status.BAD_REQUEST, - "Internal error: failed to produce output for harvesting client " + nickName + "."); - } - } - - @POST - @Path("{nickName}") - public Response createHarvestingClient(String jsonBody, @PathParam("nickName") String nickName, @QueryParam("key") String apiKey) throws IOException, JsonParseException { - - try ( StringReader rdr = new StringReader(jsonBody) ) { - JsonObject json = Json.createReader(rdr).readObject(); - - HarvestingClient harvestingClient = new HarvestingClient(); - // TODO: check that it doesn't exist yet... - harvestingClient.setName(nickName); - String dataverseAlias = jsonParser().parseHarvestingClient(json, harvestingClient); - Dataverse ownerDataverse = dataverseService.findByAlias(dataverseAlias); - - if (ownerDataverse == null) { - return errorResponse(Response.Status.BAD_REQUEST, "No such dataverse: " + dataverseAlias); - } - - harvestingClient.setDataverse(ownerDataverse); - ownerDataverse.setHarvestingClientConfig(harvestingClient); - - DataverseRequest req = createDataverseRequest(findUserOrDie()); - HarvestingClient managedHarvestingClient = execCommand( new CreateHarvestingClientCommand(req, harvestingClient)); - return createdResponse( "/datasets/" + nickName, harvestingConfigAsJson(managedHarvestingClient)); - - } catch (JsonParseException ex) { - return errorResponse( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); - - } catch (WrappedResponse ex) { - return ex.getResponse(); - - } - - } - - @PUT - @Path("{nickName}") - public Response modifyHarvestingClient(String jsonBody, @PathParam("nickName") String nickName, @QueryParam("key") String apiKey) throws IOException, JsonParseException { - HarvestingClient harvestingClient = null; - try { - harvestingClient = harvestingClientService.findByNickname(nickName); - } catch (Exception ex) { - // We don't care what happened; we'll just assume we couldn't find it. - harvestingClient = null; - } - - if (harvestingClient == null) { - return errorResponse( Response.Status.NOT_FOUND, "Harvesting client " + nickName + " not found."); - } - - String ownerDataverseAlias = harvestingClient.getDataverse().getAlias(); - - try ( StringReader rdr = new StringReader(jsonBody) ) { - DataverseRequest req = createDataverseRequest(findUserOrDie()); - JsonObject json = Json.createReader(rdr).readObject(); - - String newDataverseAlias = jsonParser().parseHarvestingClient(json, harvestingClient); - - if (newDataverseAlias != null - && !newDataverseAlias.equals("") - && !newDataverseAlias.equals(ownerDataverseAlias)) { - return errorResponse(Response.Status.BAD_REQUEST, "Bad \"dataverseAlias\" supplied. Harvesting client "+nickName+" belongs to the dataverse "+ownerDataverseAlias); - } - HarvestingClient managedHarvestingClient = execCommand( new UpdateHarvestingClientCommand(req, harvestingClient)); - return createdResponse( "/datasets/" + nickName, harvestingConfigAsJson(managedHarvestingClient)); - - } catch (JsonParseException ex) { - return errorResponse( Response.Status.BAD_REQUEST, "Error parsing harvesting client: " + ex.getMessage() ); - - } catch (WrappedResponse ex) { - return ex.getResponse(); - - } - - } - - // TODO: - // add a @DELETE method - // (there is already a DeleteHarvestingClient command) - - // Methods for managing harvesting runs (jobs): - - - // This POST starts a new harvesting run: - @POST - @Path("{nickName}/run") - public Response startHarvestingJob(@PathParam("nickName") String clientNickname, @QueryParam("key") String apiKey) throws IOException { - - try { - AuthenticatedUser authenticatedUser = null; - - try { - authenticatedUser = findAuthenticatedUserOrDie(); - } catch (WrappedResponse wr) { - return errorResponse(Response.Status.UNAUTHORIZED, "Authentication required to use this API method"); - } - - if (authenticatedUser == null || !authenticatedUser.isSuperuser()) { - return errorResponse(Response.Status.FORBIDDEN, "Only the Dataverse Admin user can run harvesting jobs"); - } - - HarvestingClient harvestingClient = harvestingClientService.findByNickname(clientNickname); - - if (harvestingClient == null) { - return errorResponse(Response.Status.NOT_FOUND, "No such dataverse: "+clientNickname); - } - - DataverseRequest dataverseRequest = createDataverseRequest(authenticatedUser); - harvesterService.doAsyncHarvest(dataverseRequest, harvestingClient); - - } catch (Exception e) { - return this.errorResponse(Response.Status.BAD_REQUEST, "Exception thrown when running harvesting client\""+clientNickname+"\" via REST API; " + e.getMessage()); - } - return this.accepted(); - } - - // This GET shows the status of the harvesting run in progress for this - // client, if present: - // @GET - // @Path("{nickName}/run") - // TODO: - - // This DELETE kills the harvesting run in progress for this client, - // if present: - // @DELETE - // @Path("{nickName}/run") - // TODO: - - /* Auxiliary, helper methods: */ - - public static JsonArrayBuilder harvestingConfigsAsJsonArray(List harvestingDataverses) { - JsonArrayBuilder hdArr = Json.createArrayBuilder(); - - for (Dataverse hd : harvestingDataverses) { - hdArr.add(harvestingConfigAsJson(hd.getHarvestingClientConfig())); - } - return hdArr; - } - - public static JsonObjectBuilder harvestingConfigAsJson(HarvestingClient harvestingConfig) { - if (harvestingConfig == null) { - return null; - } - - - return jsonObjectBuilder().add("nickName", harvestingConfig.getName()). - add("dataverseAlias", harvestingConfig.getDataverse().getAlias()). - add("type", harvestingConfig.getHarvestType()). - add("harvestUrl", harvestingConfig.getHarvestingUrl()). - add("archiveUrl", harvestingConfig.getArchiveUrl()). - add("archiveDescription",harvestingConfig.getArchiveDescription()). - add("metadataFormat", harvestingConfig.getMetadataPrefix()). - add("set", harvestingConfig.getHarvestingSet() == null ? "N/A" : harvestingConfig.getHarvestingSet()). - add("schedule", harvestingConfig.isScheduled() ? harvestingConfig.getScheduleDescription() : "none"). - add("status", harvestingConfig.isHarvestingNow() ? "inProgress" : "inActive"). - add("lastHarvest", harvestingConfig.getLastHarvestTime() == null ? "N/A" : harvestingConfig.getLastHarvestTime().toString()). - add("lastResult", harvestingConfig.getLastResult()). - add("lastSuccessful", harvestingConfig.getLastSuccessfulHarvestTime() == null ? "N/A" : harvestingConfig.getLastSuccessfulHarvestTime().toString()). - add("lastNonEmpty", harvestingConfig.getLastNonEmptyHarvestTime() == null ? "N/A" : harvestingConfig.getLastNonEmptyHarvestTime().toString()). - add("lastDatasetsHarvested", harvestingConfig.getLastHarvestedDatasetCount() == null ? "N/A" : harvestingConfig.getLastHarvestedDatasetCount().toString()). - add("lastDatasetsDeleted", harvestingConfig.getLastDeletedDatasetCount() == null ? "N/A" : harvestingConfig.getLastDeletedDatasetCount().toString()). - add("lastDatasetsFailed", harvestingConfig.getLastFailedDatasetCount() == null ? "N/A" : harvestingConfig.getLastFailedDatasetCount().toString()); - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java index c65084cedb7..46f94e1208c 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java @@ -27,11 +27,9 @@ import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; -import javax.servlet.http.HttpServletResponse; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.QueryParam; -import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import org.apache.commons.lang.StringUtils; @@ -67,8 +65,7 @@ public Response search( @QueryParam("fq") final List filterQueries, @QueryParam("show_entity_ids") boolean showEntityIds, @QueryParam("show_api_urls") boolean showApiUrls, - @QueryParam("show_my_data") boolean showMyData, - @Context HttpServletResponse response + @QueryParam("show_my_data") boolean showMyData ) { User user; @@ -185,7 +182,6 @@ public Response search( */ return errorResponse(Response.Status.BAD_REQUEST, solrQueryResponse.getError()); } - response.setHeader("Access-Control-Allow-Origin", "*"); return okResponse(value); } else { return errorResponse(Response.Status.BAD_REQUEST, "q parameter is missing"); diff --git a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java index bc23b0fa10b..629032843eb 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/imports/ImportServiceBean.java @@ -185,111 +185,6 @@ public JsonObjectBuilder handleFile(DataverseRequest dataverseRequest, Dataverse } } - public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, Dataverse owner, String metadataFormat, File metadataFile, PrintWriter cleanupLog) throws ImportException, IOException { - Dataset importedDataset = null; - - DatasetDTO dsDTO = null; - - if ("ddi".equals(metadataFormat)) { - try { - String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath())); - dsDTO = importDDIService.doImport(ImportType.HARVEST, xmlToParse); - } catch (XMLStreamException e) { - throw new ImportException("XMLStreamException" + e); - } - } // TODO: handle all supported formats; via plugins, probably - // (and if the format is already JSON - handle that too! - else { - throw new ImportException("Unsupported import metadata format: " + metadataFormat); - } - - // convert DTO to Json, - Gson gson = new GsonBuilder().setPrettyPrinting().create(); - String json = gson.toJson(dsDTO); - JsonReader jsonReader = Json.createReader(new StringReader(json)); - JsonObject obj = jsonReader.readObject(); - //and call parse Json to read it into a dataset - try { - JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService); - parser.setLenient(true); - Dataset ds = parser.parseDataset(obj); - - // For ImportType.NEW, if the metadata contains a global identifier, and it's not a protocol - // we support, it should be rejected. - // (TODO: ! - add some way of keeping track of supported protocols!) - //if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) { - // throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported"); - //} - ds.setOwner(owner); - ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields()); - - // Check data against required contraints - List violations = ds.getVersions().get(0).validateRequired(); - if (!violations.isEmpty()) { - // For migration and harvest, add NA for missing required values - for (ConstraintViolation v : violations) { - DatasetField f = ((DatasetField) v.getRootBean()); - f.setSingleValue(DatasetField.NA_VALUE); - } - } - - // Check data against validation constraints - // If we are migrating and "scrub migration data" is true we attempt to fix invalid data - // if the fix fails stop processing of this file by throwing exception - Set invalidViolations = ds.getVersions().get(0).validate(); - ValidatorFactory factory = Validation.buildDefaultValidatorFactory(); - Validator validator = factory.getValidator(); - if (!invalidViolations.isEmpty()) { - for (ConstraintViolation v : invalidViolations) { - DatasetFieldValue f = ((DatasetFieldValue) v.getRootBean()); - boolean fixed = false; - boolean converted = false; - // TODO: Is this scrubbing something we want to continue doing? - if (settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) { - fixed = processMigrationValidationError(f, cleanupLog, metadataFile.getName()); - converted = true; - if (fixed) { - Set> scrubbedViolations = validator.validate(f); - if (!scrubbedViolations.isEmpty()) { - fixed = false; - } - } - } - if (!fixed) { - String msg = "Data modified - File: " + metadataFile.getName() + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; " - + "Invalid value: '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'"; - cleanupLog.println(msg); - f.setValue(DatasetField.NA_VALUE); - - } - } - } - - Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId()); - - if (existingDs != null) { - // For harvested datasets, there should always only be one version. - // We will replace the current version with the imported version. - if (existingDs.getVersions().size() != 1) { - throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions"); - } - engineSvc.submit(new DestroyDatasetCommand(existingDs, dataverseRequest)); - importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST)); - - } else { - importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST)); - } - - } catch (JsonParseException ex) { - logger.log(Level.INFO, "Error parsing datasetVersion: {0}", ex.getMessage()); - throw new ImportException("Error parsing datasetVersion: " + ex.getMessage(), ex); - } catch (CommandException ex) { - logger.log(Level.INFO, "Error excuting Create dataset command: {0}", ex.getMessage()); - throw new ImportException("Error excuting dataverse command: " + ex.getMessage(), ex); - } - return importedDataset; - } - public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse owner, String xmlToParse, String fileName, ImportType importType, PrintWriter cleanupLog) throws ImportException, IOException { String status = ""; diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java deleted file mode 100644 index 6a5f0d31037..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/CreateHarvestingClientCommand.java +++ /dev/null @@ -1,37 +0,0 @@ -package edu.harvard.iq.dataverse.engine.command.impl; - -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; -import edu.harvard.iq.dataverse.authorization.Permission; -import edu.harvard.iq.dataverse.engine.command.AbstractCommand; -import edu.harvard.iq.dataverse.engine.command.CommandContext; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; - -/** - * - * @author Leonid Andreev - */ -@RequiredPermissions( Permission.EditDataverse ) -public class CreateHarvestingClientCommand extends AbstractCommand { - - private final Dataverse dv; - private final HarvestingClient harvestingClient; - - public CreateHarvestingClientCommand(DataverseRequest aRequest, HarvestingClient harvestingClient) { - super(aRequest, harvestingClient.getDataverse()); - this.harvestingClient = harvestingClient; - dv = harvestingClient.getDataverse(); - } - - @Override - public HarvestingClient execute(CommandContext ctxt) throws CommandException { - // TODO: check if the harvesting client config is legit; - // and that it is indeed new and unique? - // (may not be necessary - as the uniqueness should be enforced by - // the persistence layer... -- L.A. 4.4) - return ctxt.em().merge(this.harvestingClient); - } - -} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java deleted file mode 100644 index 01a78deec4a..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/DeleteHarvestingClientCommand.java +++ /dev/null @@ -1,39 +0,0 @@ -package edu.harvard.iq.dataverse.engine.command.impl; - -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; -import edu.harvard.iq.dataverse.authorization.Permission; -import edu.harvard.iq.dataverse.engine.command.AbstractVoidCommand; -import edu.harvard.iq.dataverse.engine.command.CommandContext; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; - -/** - * - * @author Leonid Andreev - */ -@RequiredPermissions( Permission.EditDataverse ) -public class DeleteHarvestingClientCommand extends AbstractVoidCommand { - - private final Dataverse motherDataverse; - private final HarvestingClient harvestingClient; - - public DeleteHarvestingClientCommand(DataverseRequest aRequest, HarvestingClient harvestingClient) { - super(aRequest, harvestingClient.getDataverse()); - this.motherDataverse = harvestingClient.getDataverse(); - this.harvestingClient = harvestingClient; - } - - @Override - public void executeImpl(CommandContext ctxt) throws CommandException { - if (harvestingClient == null) { - throw new IllegalCommandException("DeleteHarvestingClientCommand: attempted to execute with null harvesting client; dataverse: "+motherDataverse.getAlias(), this); - } - motherDataverse.setHarvestingClientConfig(null); - ctxt.em().remove(harvestingClient); - ctxt.em().merge(motherDataverse); - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java deleted file mode 100644 index d3e253b59ff..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/GetHarvestingClientCommand.java +++ /dev/null @@ -1,51 +0,0 @@ -package edu.harvard.iq.dataverse.engine.command.impl; - -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.authorization.Permission; -import edu.harvard.iq.dataverse.engine.command.AbstractCommand; -import edu.harvard.iq.dataverse.engine.command.CommandContext; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; -import edu.harvard.iq.dataverse.engine.command.exception.IllegalCommandException; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; -import java.util.Collections; -import java.util.Map; -import java.util.Set; - -/** - * - * @author Leonid Andreev - */ -// One can view the configuration of a Harvesting Client if and only if -// they have the permission to view the dataverse that owns the harvesting -// client. And for a Dataverse, we cannot define the permission with a -// @RequiredPermission annotation - because the decision has to be made dynamically: -// Everybody can view a published Dataverse; otherwise, an explicit -// ViewUnpublishedDataverse is needed. -// This is defined in the getRequiredPermissions() method, below. -public class GetHarvestingClientCommand extends AbstractCommand{ - private final Dataverse ownerDataverse; - - public GetHarvestingClientCommand(DataverseRequest aRequest, HarvestingClient harvestingClient) { - super(aRequest, harvestingClient.getDataverse()); - this.ownerDataverse = harvestingClient.getDataverse(); - } - - @Override - public HarvestingClient execute(CommandContext ctxt) throws CommandException { - if (ownerDataverse == null) { - throw new IllegalCommandException("GetHarvestingClientCommand called on a null dataverse object", this); - } - if (ownerDataverse.getHarvestingClientConfig() == null) { - throw new IllegalCommandException("No harvesting client is configured for dataverse "+ownerDataverse.getAlias(), this); - } - return ownerDataverse.getHarvestingClientConfig(); - } - - @Override - public Map> getRequiredPermissions() { - return Collections.singletonMap("", - ownerDataverse.isReleased() ? Collections.emptySet() - : Collections.singleton(Permission.ViewUnpublishedDataverse)); - } -} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java b/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java deleted file mode 100644 index 0e699de40e3..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/engine/command/impl/UpdateHarvestingClientCommand.java +++ /dev/null @@ -1,36 +0,0 @@ -package edu.harvard.iq.dataverse.engine.command.impl; - -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; -import edu.harvard.iq.dataverse.authorization.Permission; -import edu.harvard.iq.dataverse.engine.command.AbstractCommand; -import edu.harvard.iq.dataverse.engine.command.CommandContext; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.engine.command.RequiredPermissions; -import edu.harvard.iq.dataverse.engine.command.exception.CommandException; - -/** - * - * @author Leonid Andreev - */ -@RequiredPermissions( Permission.EditDataverse ) -public class UpdateHarvestingClientCommand extends AbstractCommand { - - private final Dataverse dv; - private final HarvestingClient harvestingClient; - - public UpdateHarvestingClientCommand(DataverseRequest aRequest, HarvestingClient harvestingClient) { - super(aRequest, harvestingClient.getDataverse()); - this.harvestingClient = harvestingClient; - dv = harvestingClient.getDataverse(); - } - - @Override - public HarvestingClient execute(CommandContext ctxt) throws CommandException { - // TODO: check that the harvesting client config is attached to a legit - // dataverse; and that we are in fact modifying a config that already - // exists. -- L.A. 4.4 - return ctxt.em().merge(this.harvestingClient); - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java deleted file mode 100644 index 7cd8c4e603d..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/ClientHarvestRun.java +++ /dev/null @@ -1,186 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse.harvest.client; - -import java.io.Serializable; -import java.util.Date; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.JoinColumn; -import javax.persistence.ManyToOne; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; - -/** - * - * @author Leonid Andreev - * - * This is a record of an attempted harvesting client run. (Should it be named - * HarvestingClientRunResult instead?) - */ -@Entity -public class ClientHarvestRun implements Serializable { - - private static final long serialVersionUID = 1L; - - @Id - @GeneratedValue(strategy = GenerationType.AUTO) - private Long id; - - public Long getId() { - return id; - } - - public void setId(Long id) { - this.id = id; - } - - public enum RunResultType { SUCCESS, FAILURE, INPROGRESS }; - - private static String RESULT_LABEL_SUCCESS = "SUCCESS"; - private static String RESULT_LABEL_FAILURE = "FAILED"; - private static String RESULT_LABEL_INPROGRESS = "INPROGRESS"; - - @ManyToOne - @JoinColumn(nullable = false) - private HarvestingClient harvestingClient; - - public HarvestingClient getHarvestingClient() { - return harvestingClient; - } - - public void setHarvestingClient(HarvestingClient harvestingClient) { - this.harvestingClient = harvestingClient; - } - - private RunResultType harvestResult; - - public RunResultType getResult() { - return harvestResult; - } - - public String getResultLabel() { - if (isSuccess()) { - return RESULT_LABEL_SUCCESS; - } else if (isFailed()) { - return RESULT_LABEL_FAILURE; - } else if (isInProgress()) { - return RESULT_LABEL_INPROGRESS; - } - return null; - } - - public void setResult(RunResultType harvestResult) { - this.harvestResult = harvestResult; - } - - public boolean isSuccess() { - return RunResultType.SUCCESS == harvestResult; - } - - public void setSuccess() { - harvestResult = RunResultType.SUCCESS; - } - - public boolean isFailed() { - return RunResultType.FAILURE == harvestResult; - } - - public void setFailed() { - harvestResult = RunResultType.FAILURE; - } - - public boolean isInProgress() { - return RunResultType.INPROGRESS == harvestResult || - (harvestResult == null && startTime != null && finishTime == null); - } - - public void setInProgress() { - harvestResult = RunResultType.INPROGRESS; - } - - // Time of this harvest attempt: - @Temporal(value = TemporalType.TIMESTAMP) - private Date startTime; - - public Date getStartTime() { - return startTime; - } - - public void setStartTime(Date startTime) { - this.startTime = startTime; - } - - @Temporal(value = TemporalType.TIMESTAMP) - private Date finishTime; - - public Date getFinishTime() { - return finishTime; - } - - public void setFinishTime(Date finishTime) { - this.finishTime = finishTime; - } - - // Tese are the Dataset counts from that last harvest: - // (TODO: do we need to differentiate between *created* (new), and *updated* - // harvested datasets? -- L.A. 4.4 - private Long harvestedDatasetCount = 0L; - private Long failedDatasetCount = 0L; - private Long deletedDatasetCount = 0L; - - public Long getHarvestedDatasetCount() { - return harvestedDatasetCount; - } - - public void setHarvestedDatasetCount(Long harvestedDatasetCount) { - this.harvestedDatasetCount = harvestedDatasetCount; - } - - public Long getFailedDatasetCount() { - return failedDatasetCount; - } - - public void setFailedDatasetCount(Long failedDatasetCount) { - this.failedDatasetCount = failedDatasetCount; - } - - public Long getDeletedDatasetCount() { - return deletedDatasetCount; - } - - public void setDeletedDatasetCount(Long deletedDatasetCount) { - this.deletedDatasetCount = deletedDatasetCount; - } - - @Override - public int hashCode() { - int hash = 0; - hash += (id != null ? id.hashCode() : 0); - return hash; - } - - @Override - public boolean equals(Object object) { - // TODO: Warning - this method won't work in the case the id fields are not set - if (!(object instanceof ClientHarvestRun)) { - return false; - } - ClientHarvestRun other = (ClientHarvestRun) object; - if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { - return false; - } - return true; - } - - @Override - public String toString() { - return "edu.harvard.iq.dataverse.harvest.client.HarvestingClientRun[ id=" + id + " ]"; - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java deleted file mode 100644 index f87e182ecf8..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/FastGetRecord.java +++ /dev/null @@ -1,520 +0,0 @@ -/* - Copyright (C) 2005-2012, by the President and Fellows of Harvard College. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - Dataverse Network - A web application to share, preserve and analyze research data. - Developed at the Institute for Quantitative Social Science, Harvard University. - Version 3.0. -*/ -package edu.harvard.iq.dataverse.harvest.client; - -import java.io.IOException; -import java.io.FileNotFoundException; - -import java.io.InputStream; -import java.io.StringReader; -import java.io.InputStreamReader; -import java.io.BufferedReader; -import java.io.File; - -import java.io.FileOutputStream; - -import java.io.PrintWriter; -import java.net.HttpURLConnection; -import java.net.URL; - -import java.util.zip.GZIPInputStream; -import java.util.zip.InflaterInputStream; -import java.util.zip.ZipInputStream; - - -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; -import org.xml.sax.SAXException; - -//import org.xml.sax.InputSource; - -import javax.xml.stream.XMLStreamConstants; -import javax.xml.stream.XMLStreamException; -import javax.xml.stream.XMLStreamReader; -import javax.xml.stream.XMLInputFactory; - -/* - * This is an optimized implementation of OAIPMH GetRecord method. - * Some code is borrowed from the OCLC implementation. - * It handles the retrieval of the record in a drastically different manner: - * It parses and validates the top, "administrative" portion of the record using - * an event-driven parser. Once it reaches the "payload", the actual metadata - * record enclosed in ... tags, it just reads it line by - * line without parsing and saves it in a temp file. (The record will be parsed - * and validated in the next step, when we attempt to import it). - * On a very large record, for example, a DDI of a Dataset with a large number - * of associated data variables, even event-driven XML parsing can end up - * being rather expensive. - * This optimized version was originally written for DVN 3.*. - * Added in Dataverse 4: custom protocol extension for sending the metadata - * record as a pre-declared numbe of bytes. - * @author Leonid Andreev - * -*/ - -public class FastGetRecord { - - /** - * Client-side GetRecord verb constructor - * - * @param baseURL the baseURL of the server to be queried - * @exception MalformedURLException the baseURL is bad - * @exception SAXException the xml response is bad - * @exception IOException an I/O error occurred - */ - - public FastGetRecord(String baseURL, String identifier, String metadataPrefix) - throws IOException, ParserConfigurationException, SAXException, - TransformerException { - harvestRecord (baseURL, identifier, metadataPrefix); - - } - - private String errorMessage = null; - private File savedMetadataFile = null; - private XMLInputFactory xmlInputFactory = null; - private boolean recordDeleted = false; - - // TODO: logging - - public String getErrorMessage () { - return errorMessage; - } - - public File getMetadataFile () { - return savedMetadataFile; - } - - public boolean isDeleted () { - return this.recordDeleted; - } - - - public void harvestRecord(String baseURL, String identifier, String metadataPrefix) throws IOException, - ParserConfigurationException, SAXException, TransformerException { - - xmlInputFactory = javax.xml.stream.XMLInputFactory.newInstance(); - - String requestURL = getRequestURL(baseURL, identifier, metadataPrefix); - - InputStream in = null; - URL url = new URL(requestURL); - HttpURLConnection con = null; - int responseCode = 0; - - con = (HttpURLConnection) url.openConnection(); - con.setRequestProperty("User-Agent", "OAIHarvester/2.0"); - con.setRequestProperty("Accept-Encoding", - "compress, gzip, identify"); - try { - responseCode = con.getResponseCode(); - //logger.debug("responseCode=" + responseCode); - } catch (FileNotFoundException e) { - //logger.info(requestURL, e); - responseCode = HttpURLConnection.HTTP_UNAVAILABLE; - } - - // TODO: -- L.A. - // - // support for cookies; - // support for limited retry attempts -- ? - // implement reading of the stream as filterinputstream -- ? - // -- that could make it a little faster still. -- L.A. - - - - if (responseCode == 200) { - - String contentEncoding = con.getHeaderField("Content-Encoding"); - //logger.debug("contentEncoding=" + contentEncoding); - - // support for the standard compress/gzip/deflate compression - // schemes: - - if ("compress".equals(contentEncoding)) { - ZipInputStream zis = new ZipInputStream(con.getInputStream()); - zis.getNextEntry(); - in = zis; - } else if ("gzip".equals(contentEncoding)) { - in = new GZIPInputStream(con.getInputStream()); - } else if ("deflate".equals(contentEncoding)) { - in = new InflaterInputStream(con.getInputStream()); - } else { - in = con.getInputStream(); - } - - // We are going to read the OAI header and SAX-parse it for the - // error messages and other protocol information; - // The metadata section we're going to simply save in a temporary - // file, unparsed. - - BufferedReader rd = new BufferedReader(new InputStreamReader(in)); - - String line = null; - String oaiResponseHeader = ""; - boolean metadataFlag = false; - boolean metadataWritten = false; - boolean schemaChecked = false; - - savedMetadataFile = File.createTempFile("meta", ".tmp"); - FileOutputStream tempFileStream = new FileOutputStream(savedMetadataFile); - PrintWriter metadataOut = new PrintWriter (tempFileStream, true); - - metadataOut.println(""); - - int mopen = 0; - int mclose = 0; - - while ( ( line = rd.readLine () ) != null) { - if (!metadataFlag) { - if (line.matches(".*.*")) { - String lineCopy = line; - - int i = line.indexOf(""); - line = line.substring(i+10); - - oaiResponseHeader = oaiResponseHeader.concat(lineCopy.replaceAll(".*", "")); - - metadataFlag = true; - } - } - - if (metadataFlag) { - if (!metadataWritten) { - // Inside an OAI-PMH GetRecord response, the metadata - // record returned is enclosed in ... - // tags, after the OAI service sections that provide the - // date, identifier and other protocol-level information. - // However, it is possible for the metadata record itself - // to have tags of its own. So we have no - // choice but to count the opening and closing tags in - // order to recognize the one terminating the metadata - // section. - // This code isn't pretty, but on seriously large records - // the savings from not fully parsing the XML are - // significant. - // -- L.A. - - if (line.matches(" -1) { - if (!line.substring(i).matches("^]*/")) { - // don't count if it's a closed, empty tag: - // - mopen++; - } - i+=10; - } - } - if (line.matches(".*.*")) { - int i = 0; - while ((i = line.indexOf("", i)) > -1) { - i+=11; - mclose++; - } - - if ( mclose > mopen ) { - line = line.substring(0, line.lastIndexOf("")); - metadataWritten = true; - } - } - - if (!schemaChecked) { - // if the top-level XML element lacks the schema definition, - // insert the generic xmlns and xmlns:xsi attributes; these - // may be needed by the transform stylesheets. - // this mimicks the behaviour of the OCLC GetRecord - // client implementation. - // -L.A. - - int offset = 0; - - // However, there may be one or more XML comments before - // the first "real" XML element (of the form - // ). So we need to skip these! - - while ( (line.indexOf('<', offset) > -1) - && - "': - - while (line != null - && - ((offset = line.indexOf("-->",offset)) < 0)) { - line = line.replaceAll("[\n\r]", " "); - offset = line.length(); - line = line.concat(rd.readLine()); - } - - offset += 3; - } - - // if we have skipped some comments, is there another - // XML element left in the buffered line? - int firstElementStart = -1; - - if ((firstElementStart = line.indexOf('<', offset)) > -1 ) { - // OK, looks like there is. - // is it terminated? - // if not, let's read the stream until - // we find the closing '>': - - int firstElementEnd = -1; - offset = firstElementStart; - - while (line != null - && - ((firstElementEnd = line.indexOf('>',offset)) < 0)) { - - line = line.replaceAll("[\n\r]", ""); - offset = line.length(); - line = line.concat(rd.readLine()); - } - - if (firstElementEnd < 0) { - // this should not happen! - // we've reached the end of the XML stream - // without encountering a single valid XML tag -- ?? - - this.errorMessage = "Malformed GetRecord response; reached the end of the stream but couldn't find a single valid XML element in the metadata section."; - } else { - - // OK, we now have a line that contains a complete, - // terminated (possibly multi-line) first XML element - // that starts at [offset]. - - int i = firstElementStart; - - if (!line.substring(i).matches("^<[^>]*xmlns.*")) { - String head = line.substring(0, i); - String tail = line.substring(i); - tail = tail.replaceFirst(">", " xmlns=\"http://www.openarchives.org/OAI/2.0/\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\">"); - line = head + tail; - } - - schemaChecked = true; - } - } else { - // there was no "real" XML elements, only comments. - // We'll perform this schema check in the next - // iteration. - } - } - - metadataOut.println(line); - } - } else { - oaiResponseHeader = oaiResponseHeader.concat(line); - } - } - - // parse the OAI Record header: - - XMLStreamReader xmlr = null; - - try { - StringReader reader = new StringReader(oaiResponseHeader); - xmlr = xmlInputFactory.createXMLStreamReader(reader); - processOAIheader(xmlr); - - } catch (XMLStreamException ex) { - //Logger.getLogger("global").log(Level.SEVERE, null, ex); - if (this.errorMessage == null) { - this.errorMessage = "Malformed GetRecord response: " + oaiResponseHeader; - } - - // delete the temp metadata file; we won't need it: - if (savedMetadataFile != null) { - //savedMetadataFile.delete(); - } - - } - - try { - if (xmlr != null) { - xmlr.close(); - } - } catch (Exception ed) { - // seems OK to ignore; - } - - - if (rd != null) { - rd.close(); - } - - if (metadataOut != null) { - metadataOut.close(); - } - - if (!(metadataWritten) && !(this.isDeleted())) { - this.errorMessage = "Failed to parse GetRecord response; "+oaiResponseHeader; - //savedMetadataFile.delete(); - } - - if (this.isDeleted()) { - //savedMetadataFile.delete(); - } - - - } else { - this.errorMessage = "GetRecord request failed. HTTP error code "+responseCode; - } - } - - /** - * Construct the query portion of the http request - * (borrowed from OCLC implementation) - * - * @return a String containing the query portion of the http request - */ - private static String getRequestURL(String baseURL, - String identifier, - String metadataPrefix) { - - StringBuffer requestURL = new StringBuffer(baseURL); - requestURL.append("?verb=GetRecord"); - requestURL.append("&identifier=").append(identifier); - requestURL.append("&metadataPrefix=").append(metadataPrefix); - - return requestURL.toString(); - } - - private void processOAIheader (XMLStreamReader xmlr) throws XMLStreamException { - - // is this really a GetRecord response? - xmlr.nextTag(); - xmlr.require(XMLStreamConstants.START_ELEMENT, null, "OAI-PMH"); - processOAIPMH(xmlr); - - } - - private void processOAIPMH (XMLStreamReader xmlr) throws XMLStreamException { - - for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { - if (event == XMLStreamConstants.START_ELEMENT) { - // TODO: - // process all the fields currently skipped -- ? -- L.A. - if (xmlr.getLocalName().equals("responseDate")) {} - else if (xmlr.getLocalName().equals("request")) {} - else if (xmlr.getLocalName().equals("error")) { - String errorCode = xmlr.getAttributeValue(null, "code"); - String errorMessageText = getElementText(xmlr); - - if (errorCode != null) { - this.errorMessage = "GetRecord error code: "+errorCode+"; "; - } - - if (errorCode != null) { - this.errorMessage = this.errorMessage + "GetRecord error message: "+errorMessageText+"; "; - } - throw new XMLStreamException(this.errorMessage); - - } - else if (xmlr.getLocalName().equals("GetRecord")) processGetRecordSection(xmlr); - } else if (event == XMLStreamConstants.END_ELEMENT) { - if (xmlr.getLocalName().equals("OAI-PMH")) return; - } - } - } - - private void processGetRecordSection (XMLStreamReader xmlr) throws XMLStreamException { - for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { - if (event == XMLStreamConstants.START_ELEMENT) { - if (xmlr.getLocalName().equals("record")) {processRecord(xmlr);} - } else if (event == XMLStreamConstants.END_ELEMENT) { - if (xmlr.getLocalName().equals("GetRecord")) return; - } - } - - } - - private void processRecord (XMLStreamReader xmlr) throws XMLStreamException { - for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { - if (event == XMLStreamConstants.START_ELEMENT) { - if (xmlr.getLocalName().equals("header")) { - if ("deleted".equals( xmlr.getAttributeValue(null, "status"))) { - this.recordDeleted = true; - } - processHeader(xmlr); - } else if (xmlr.getLocalName().equals("metadata")) {/*do nothing;*/} - } else if (event == XMLStreamConstants.END_ELEMENT) { - if (xmlr.getLocalName().equals("record")) return; - } - } - } - - private void processHeader (XMLStreamReader xmlr) throws XMLStreamException { - for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) { - if (event == XMLStreamConstants.START_ELEMENT) { - if (xmlr.getLocalName().equals("identifier")) {/*do nothing*/} - else if (xmlr.getLocalName().equals("datestamp")) {/*do nothing -- ?*/} - else if (xmlr.getLocalName().equals("setSpec")) {/*do nothing*/} - - - } else if (event == XMLStreamConstants.END_ELEMENT) { - if (xmlr.getLocalName().equals("header")) return; - } - } - } - - - // (from Gustavo's ddiServiceBean -- L.A.) - // - /* We had to add this method because the ref getElementText has a bug where it - * would append a null before the text, if there was an escaped apostrophe; it appears - * that the code finds an null ENTITY_REFERENCE in this case which seems like a bug; - * the workaround for the moment is to comment or handling ENTITY_REFERENCE in this case - */ - private String getElementText(XMLStreamReader xmlr) throws XMLStreamException { - if(xmlr.getEventType() != XMLStreamConstants.START_ELEMENT) { - throw new XMLStreamException("parser must be on START_ELEMENT to read next text", xmlr.getLocation()); - } - int eventType = xmlr.next(); - StringBuffer content = new StringBuffer(); - while(eventType != XMLStreamConstants.END_ELEMENT ) { - if(eventType == XMLStreamConstants.CHARACTERS - || eventType == XMLStreamConstants.CDATA - || eventType == XMLStreamConstants.SPACE - /* || eventType == XMLStreamConstants.ENTITY_REFERENCE*/) { - content.append(xmlr.getText()); - } else if(eventType == XMLStreamConstants.PROCESSING_INSTRUCTION - || eventType == XMLStreamConstants.COMMENT - || eventType == XMLStreamConstants.ENTITY_REFERENCE) { - // skipping - } else if(eventType == XMLStreamConstants.END_DOCUMENT) { - throw new XMLStreamException("unexpected end of document when reading element text content"); - } else if(eventType == XMLStreamConstants.START_ELEMENT) { - throw new XMLStreamException("element text content may not contain START_ELEMENT", xmlr.getLocation()); - } else { - throw new XMLStreamException("Unexpected event type "+eventType, xmlr.getLocation()); - } - eventType = xmlr.next(); - } - return content.toString(); - } - - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java deleted file mode 100644 index b9db5f22d02..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestTimerInfo.java +++ /dev/null @@ -1,97 +0,0 @@ -/* - Copyright (C) 2005-2012, by the President and Fellows of Harvard College. - - Licensed under the Apache License, Version 2.0 (the "License"); - you may not use this file except in compliance with the License. - You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. - - Dataverse Network - A web application to share, preserve and analyze research data. - Developed at the Institute for Quantitative Social Science, Harvard University. - Version 3.0. -*/ -package edu.harvard.iq.dataverse.harvest.client; - -import java.io.Serializable; - -/** - * This class is used when creating an EJB Timer for scheduling Harvesting. - * We use this class rather than the HarvestingClient entity because - * the class must be Serializable, and there is too much info associated with the HarvestingClient - * in order to realistically serialize it. (We can't make related mapped entities transient.) - * - * Based on the DVN 3 implementation, - * original - * @author Ellen Kraffmiller - * incorporated into Dataverse 4 by - * @author Leonid Andreev - */ -public class HarvestTimerInfo implements Serializable { - private Long harvestingClientId; - private String name; - private String schedulePeriod; - private Integer scheduleHourOfDay; - - public HarvestTimerInfo() { - - } - - - public HarvestTimerInfo(Long harvestingClientId, String name, String schedulePeriod, Integer scheduleHourOfDay, Integer scheduleDayOfWeek) { - this.harvestingClientId=harvestingClientId; - this.name=name; - this.schedulePeriod=schedulePeriod; - this.scheduleDayOfWeek=scheduleDayOfWeek; - this.scheduleHourOfDay=scheduleHourOfDay; - } - - - public Long getHarvestingClientId() { - return harvestingClientId; - } - - public void setHarvestingClientId(Long harvestingClientId) { - this.harvestingClientId = harvestingClientId; - } - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - public String getSchedulePeriod() { - return schedulePeriod; - } - - public void setSchedulePeriod(String schedulePeriod) { - this.schedulePeriod = schedulePeriod; - } - - public Integer getScheduleHourOfDay() { - return scheduleHourOfDay; - } - - public void setScheduleHourOfDay(Integer scheduleHourOfDay) { - this.scheduleHourOfDay = scheduleHourOfDay; - } - - public Integer getScheduleDayOfWeek() { - return scheduleDayOfWeek; - } - - public void setScheduleDayOfWeek(Integer scheduleDayOfWeek) { - this.scheduleDayOfWeek = scheduleDayOfWeek; - } - private Integer scheduleDayOfWeek; - - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java deleted file mode 100644 index 939a9e1e582..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvesterServiceBean.java +++ /dev/null @@ -1,431 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse.harvest.client; - -import edu.harvard.iq.dataverse.Dataset; -import edu.harvard.iq.dataverse.DatasetServiceBean; -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.timer.DataverseTimerServiceBean; -import edu.harvard.iq.dataverse.util.FileUtil; -import java.io.File; -import java.io.IOException; -import java.text.SimpleDateFormat; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.Iterator; -import java.util.List; -import java.net.URLEncoder; -import java.util.logging.FileHandler; -import java.util.logging.Level; -import java.util.logging.Logger; -import javax.annotation.Resource; -import javax.ejb.Asynchronous; -import javax.ejb.EJB; -import javax.ejb.EJBException; -import javax.ejb.Stateless; -import javax.ejb.Timer; -import javax.ejb.TransactionAttribute; -import javax.ejb.TransactionAttributeType; -import javax.faces.bean.ManagedBean; -import javax.inject.Named; -import javax.xml.bind.Unmarshaller; -import javax.xml.parsers.ParserConfigurationException; -import javax.xml.transform.TransformerException; -import org.apache.commons.lang.mutable.MutableBoolean; -import org.xml.sax.SAXException; - -import com.lyncode.xoai.model.oaipmh.Granularity; -import com.lyncode.xoai.model.oaipmh.Header; -import com.lyncode.xoai.serviceprovider.ServiceProvider; -import com.lyncode.xoai.serviceprovider.model.Context; -import com.lyncode.xoai.serviceprovider.client.HttpOAIClient; -import com.lyncode.xoai.serviceprovider.exceptions.BadArgumentException; -import com.lyncode.xoai.serviceprovider.parameters.ListIdentifiersParameters; -import edu.harvard.iq.dataverse.api.imports.ImportServiceBean; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; - -/** - * - * @author Leonid Andreev - */ -@Stateless(name = "harvesterService") -@Named -@ManagedBean -public class HarvesterServiceBean { - @EJB - DataverseServiceBean dataverseService; - @EJB - DatasetServiceBean datasetService; - @Resource - javax.ejb.TimerService timerService; - @EJB - DataverseTimerServiceBean dataverseTimerService; - @EJB - HarvestingClientServiceBean harvestingClientService; - @EJB - ImportServiceBean importService; - - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean"); - private static final SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd"); - private static final SimpleDateFormat logFormatter = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss"); - - public static final String HARVEST_RESULT_SUCCESS="success"; - public static final String HARVEST_RESULT_FAILED="failed"; - private static final Long INDEXING_CONTENT_BATCH_SIZE = 10000000L; - - - public HarvesterServiceBean() { - - } - - /** - * Called to run an "On Demand" harvest. - */ - @Asynchronous - public void doAsyncHarvest(DataverseRequest dataverseRequest, HarvestingClient harvestingClient) { - - try { - doHarvest(dataverseRequest, harvestingClient.getId()); - } catch (Exception e) { - logger.info("Caught exception running an asynchronous harvest (dataverse \""+harvestingClient.getName()+"\")"); - } - } - - public void createScheduledHarvestTimers() { - logger.log(Level.INFO, "HarvesterService: going to (re)create Scheduled harvest timers."); - dataverseTimerService.removeHarvestTimers(); - - List configuredClients = harvestingClientService.getAllHarvestingClients(); - for (Iterator it = configuredClients.iterator(); it.hasNext();) { - HarvestingClient harvestingConfig = (HarvestingClient) it.next(); - if (harvestingConfig.isScheduled()) { - dataverseTimerService.createHarvestTimer(harvestingConfig); - } - } - } - - public List getHarvestTimers() { - ArrayList timers = new ArrayList<>(); - - for (Iterator it = timerService.getTimers().iterator(); it.hasNext();) { - Timer timer = (Timer) it.next(); - if (timer.getInfo() instanceof HarvestTimerInfo) { - HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); - timers.add(info); - } - } - return timers; - } - - /* - This method is implemented in the DataverseTimerServiceBean; - TODO: make sure that implementation does everything we need. - -- L.A. 4.4, May 08 2016. - private void createHarvestTimer(Dataverse harvestingDataverse) { - HarvestingClient harvestingDataverseConfig = harvestingDataverse.getHarvestingClientConfig(); - - if (harvestingDataverseConfig == null) { - logger.info("ERROR: No Harvesting Configuration found for dataverse id="+harvestingDataverse.getId()); - return; - } - - if (harvestingDataverseConfig.isScheduled()) { - long intervalDuration = 0; - Calendar initExpiration = Calendar.getInstance(); - initExpiration.set(Calendar.MINUTE, 0); - initExpiration.set(Calendar.SECOND, 0); - if (harvestingDataverseConfig.getSchedulePeriod().equals(harvestingDataverseConfig.SCHEDULE_PERIOD_DAILY)) { - intervalDuration = 1000 * 60 * 60 * 24; - initExpiration.set(Calendar.HOUR_OF_DAY, harvestingDataverseConfig.getScheduleHourOfDay()); - - } else if (harvestingDataverseConfig.getSchedulePeriod().equals(harvestingDataverseConfig.SCHEDULE_PERIOD_WEEKLY)) { - intervalDuration = 1000 * 60 * 60 * 24 * 7; - initExpiration.set(Calendar.HOUR_OF_DAY, harvestingDataverseConfig.getScheduleHourOfDay()); - initExpiration.set(Calendar.DAY_OF_WEEK, harvestingDataverseConfig.getScheduleDayOfWeek()); - - } else { - logger.log(Level.WARNING, "Could not set timer for dataverse id, " + harvestingDataverse.getId() + ", unknown schedule period: " + harvestingDataverseConfig.getSchedulePeriod()); - return; - } - Date initExpirationDate = initExpiration.getTime(); - Date currTime = new Date(); - if (initExpirationDate.before(currTime)) { - initExpirationDate.setTime(initExpiration.getTimeInMillis() + intervalDuration); - } - logger.log(Level.INFO, "Setting timer for dataverse " + harvestingDataverse.getName() + ", initial expiration: " + initExpirationDate); - dataverseTimerService.createTimer(initExpirationDate, intervalDuration, new HarvestTimerInfo(harvestingDataverse.getId(), harvestingDataverse.getName(), harvestingDataverseConfig.getSchedulePeriod(), harvestingDataverseConfig.getScheduleHourOfDay(), harvestingDataverseConfig.getScheduleDayOfWeek())); - } - } - */ - - /** - * Run a harvest for an individual harvesting Dataverse - * @param dataverseId - */ - public void doHarvest(DataverseRequest dataverseRequest, Long harvestingClientId) throws IOException { - HarvestingClient harvestingClientConfig = harvestingClientService.find(harvestingClientId); - - if (harvestingClientConfig == null) { - throw new IOException("No such harvesting client: id="+harvestingClientId); - } - - Dataverse harvestingDataverse = harvestingClientConfig.getDataverse(); - - MutableBoolean harvestErrorOccurred = new MutableBoolean(false); - String logTimestamp = logFormatter.format(new Date()); - Logger hdLogger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean." + harvestingDataverse.getAlias() + logTimestamp); - String logFileName = "../logs" + File.separator + "harvest_" + harvestingClientConfig.getName() + logTimestamp + ".log"; - FileHandler fileHandler = new FileHandler(logFileName); - hdLogger.addHandler(fileHandler); - List harvestedDatasetIds = null; - - List harvestedDatasetIdsThisBatch = new ArrayList(); - - List failedIdentifiers = new ArrayList(); - Date harvestStartTime = new Date(); - - try { - boolean harvestingNow = harvestingClientConfig.isHarvestingNow(); - - if (harvestingNow) { - harvestErrorOccurred.setValue(true); - hdLogger.log(Level.SEVERE, "Cannot begin harvesting, Dataverse " + harvestingDataverse.getName() + " is currently being harvested."); - - } else { - harvestingClientService.resetHarvestInProgress(harvestingDataverse.getId()); - harvestingClientService.setHarvestInProgress(harvestingDataverse.getId(), harvestStartTime); - - - if (harvestingClientConfig.isOai()) { - harvestedDatasetIds = harvestOAI(dataverseRequest, harvestingClientConfig, hdLogger, harvestErrorOccurred, failedIdentifiers, harvestedDatasetIdsThisBatch); - - } else { - throw new IOException("Unsupported harvest type"); - } - harvestingClientService.setHarvestSuccess(harvestingDataverse.getId(), new Date(), harvestedDatasetIds.size(), failedIdentifiers.size()); - hdLogger.log(Level.INFO, "COMPLETED HARVEST, server=" + harvestingClientConfig.getArchiveUrl() + ", metadataPrefix=" + harvestingClientConfig.getMetadataPrefix()); - hdLogger.log(Level.INFO, "Datasets created/updated: " + harvestedDatasetIds.size() + ", datasets deleted: [TODO:], datasets failed: " + failedIdentifiers.size()); - - // now index all the datasets we have harvested - created, modified or deleted: - /* (TODO: !!!) - if (this.processedSizeThisBatch > 0) { - hdLogger.log(Level.INFO, "POST HARVEST, reindexing the remaining studies."); - if (this.harvestedDatasetIdsThisBatch != null) { - hdLogger.log(Level.INFO, this.harvestedDatasetIdsThisBatch.size()+" studies in the batch"); - } - hdLogger.log(Level.INFO, this.processedSizeThisBatch + " bytes of content"); - indexService.updateIndexList(this.harvestedDatasetIdsThisBatch); - hdLogger.log(Level.INFO, "POST HARVEST, calls to index finished."); - } else { - hdLogger.log(Level.INFO, "(All harvested content already reindexed)"); - } - */ - } - //mailService.sendHarvestNotification(...getSystemEmail(), harvestingDataverse.getName(), logFileName, logTimestamp, harvestErrorOccurred.booleanValue(), harvestedDatasetIds.size(), failedIdentifiers); - } catch (Throwable e) { - harvestErrorOccurred.setValue(true); - String message = "Exception processing harvest, server= " + harvestingClientConfig.getArchiveUrl() + ",format=" + harvestingClientConfig.getMetadataPrefix() + " " + e.getClass().getName() + " " + e.getMessage(); - hdLogger.log(Level.SEVERE, message); - logException(e, hdLogger); - hdLogger.log(Level.INFO, "HARVEST NOT COMPLETED DUE TO UNEXPECTED ERROR."); - // TODO: - // even though this harvesting run failed, we may have had successfully - // processed some number of datasets, by the time the exception was thrown. - // We should record that number too. And the number of the datasets that - // had failed, that we may have counted. -- L.A. 4.4 - harvestingClientService.setHarvestFailure(harvestingDataverse.getId(), new Date()); - - } finally { - harvestingClientService.resetHarvestInProgress(harvestingDataverse.getId()); - fileHandler.close(); - hdLogger.removeHandler(fileHandler); - } - } - - /** - * - * @param harvestingClient the harvesting client object - * @param hdLogger custom logger (specific to this harvesting run) - * @param harvestErrorOccurred have we encountered any errors during harvest? - * @param failedIdentifiers Study Identifiers for failed "GetRecord" requests - */ - private List harvestOAI(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, Logger hdLogger, MutableBoolean harvestErrorOccurred, List failedIdentifiers, List harvestedDatasetIdsThisBatch) - throws IOException, ParserConfigurationException, SAXException, TransformerException { - - List harvestedDatasetIds = new ArrayList(); - Long processedSizeThisBatch = 0L; - - - String baseOaiUrl = harvestingClient.getHarvestingUrl(); - String metadataPrefix = harvestingClient.getMetadataPrefix(); - Date fromDate = harvestingClient.getLastNonEmptyHarvestTime(); - - String set = harvestingClient.getHarvestingSet() == null ? null : URLEncoder.encode(harvestingClient.getHarvestingSet(), "UTF-8"); - - hdLogger.log(Level.INFO, "BEGIN HARVEST..., oaiUrl=" + baseOaiUrl + ",set=" + set + ", metadataPrefix=" + metadataPrefix + ", from=" + fromDate); - - ListIdentifiersParameters parameters = buildParams(metadataPrefix, set, fromDate); - ServiceProvider serviceProvider = getServiceProvider(baseOaiUrl, Granularity.Second); - - try { - for (Iterator
idIter = serviceProvider.listIdentifiers(parameters); idIter.hasNext();) { - - Header h = idIter.next(); - String identifier = h.getIdentifier(); - hdLogger.fine("identifier: " + identifier); - - // Retrieve and process this record with a separate GetRecord call: - MutableBoolean getRecordErrorOccurred = new MutableBoolean(false); - Long datasetId = getRecord(dataverseRequest, hdLogger, harvestingClient, identifier, metadataPrefix, getRecordErrorOccurred, processedSizeThisBatch); - if (datasetId != null) { - harvestedDatasetIds.add(datasetId); - } - if (getRecordErrorOccurred.booleanValue() == true) { - failedIdentifiers.add(identifier); - } - - if ( harvestedDatasetIdsThisBatch == null ) { - harvestedDatasetIdsThisBatch = new ArrayList(); - } - harvestedDatasetIdsThisBatch.add(datasetId); - - // reindexing in batches? - this is from DVN 3; - // we may not need it anymore. - if ( processedSizeThisBatch > INDEXING_CONTENT_BATCH_SIZE ) { - - hdLogger.log(Level.INFO, "REACHED CONTENT BATCH SIZE LIMIT; calling index ("+ harvestedDatasetIdsThisBatch.size()+" datasets in the batch)."); - //indexService.updateIndexList(this.harvestedDatasetIdsThisBatch); - hdLogger.log(Level.INFO, "REINDEX DONE."); - - - processedSizeThisBatch = 0L; - harvestedDatasetIdsThisBatch = null; - } - - } - } catch (BadArgumentException e) { - throw new IOException("Incorrectly formatted OAI parameter", e); - } - - hdLogger.log(Level.INFO, "COMPLETED HARVEST, oaiUrl=" + baseOaiUrl + ",set=" + set + ", metadataPrefix=" + metadataPrefix + ", from=" + fromDate); - - return harvestedDatasetIds; - - } - - private ServiceProvider getServiceProvider(String baseOaiUrl, Granularity oaiGranularity) { - Context context = new Context(); - - context.withBaseUrl(baseOaiUrl); - context.withGranularity(oaiGranularity); - context.withOAIClient(new HttpOAIClient(baseOaiUrl)); - - ServiceProvider serviceProvider = new ServiceProvider(context); - return serviceProvider; - } - - /** - * Creates an XOAI parameters object for the ListIdentifiers call - * - * @param metadataPrefix - * @param set - * @param from - * @return ListIdentifiersParameters - */ - private ListIdentifiersParameters buildParams(String metadataPrefix, String set, Date from) { - ListIdentifiersParameters mip = ListIdentifiersParameters.request(); - mip.withMetadataPrefix(metadataPrefix); - - if (from != null) { - mip.withFrom(from); - } - - if (set != null) { - mip.withSetSpec(set); - } - return mip; - } - - - @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) - public Long getRecord(DataverseRequest dataverseRequest, Logger hdLogger, HarvestingClient harvestingClient, String identifier, String metadataPrefix, MutableBoolean recordErrorOccurred, Long processedSizeThisBatch) { - String errMessage = null; - Dataset harvestedDataset = null; - String oaiUrl = harvestingClient.getHarvestingUrl(); - Dataverse parentDataverse = harvestingClient.getDataverse(); - - try { - hdLogger.log(Level.INFO, "Calling GetRecord: oaiUrl =" + oaiUrl + "?verb=GetRecord&identifier=" + identifier + "&metadataPrefix=" + metadataPrefix); - - FastGetRecord record = new FastGetRecord(oaiUrl, identifier, metadataPrefix); - errMessage = record.getErrorMessage(); - - if (errMessage != null) { - hdLogger.log(Level.SEVERE, "Error calling GetRecord - " + errMessage); - } else if (record.isDeleted()) { - hdLogger.log(Level.INFO, "Received 'deleted' status from OAI Server."); - Dataset dataset = null; //TODO: !!! datasetService.getDatasetByHarvestInfo(dataverse, identifier); - if (dataset != null) { - hdLogger.log(Level.INFO, "Deleting study " + dataset.getGlobalId()); - // TODO: !!! datasetService.deleteDataset(dataset.getId()); - } else { - hdLogger.log(Level.INFO, "No study found for this record, skipping delete. "); - } - - } else { - hdLogger.log(Level.INFO, "Successfully retrieved GetRecord response."); - - harvestedDataset = importService.doImportHarvestedDataset(dataverseRequest, parentDataverse, metadataPrefix, record.getMetadataFile(), null); - - hdLogger.log(Level.INFO, "Harvest Successful for identifier " + identifier); - - processedSizeThisBatch += record.getMetadataFile().length(); - } - } catch (Throwable e) { - errMessage = "Exception processing getRecord(), oaiUrl=" + oaiUrl + ",identifier=" + identifier + " " + e.getClass().getName() + " " + e.getMessage(); - hdLogger.log(Level.SEVERE, errMessage); - logException(e, hdLogger); - - } - - // If we got an Error from the OAI server or an exception happened during import, then - // set recordErrorOccurred to true (if recordErrorOccurred is being used) - // otherwise throw an exception (if recordErrorOccurred is not used, i.e null) - - if (errMessage != null) { - if (recordErrorOccurred != null) { - recordErrorOccurred.setValue(true); - } else { - throw new EJBException(errMessage); - } - } - - return harvestedDataset != null ? harvestedDataset.getId() : null; - } - - private void logException(Throwable e, Logger logger) { - - boolean cause = false; - String fullMessage = ""; - do { - String message = e.getClass().getName() + " " + e.getMessage(); - if (cause) { - message = "\nCaused By Exception.................... " + e.getClass().getName() + " " + e.getMessage(); - } - StackTraceElement[] ste = e.getStackTrace(); - message += "\nStackTrace: \n"; - for (int m = 0; m < ste.length; m++) { - message += ste[m].toString() + "\n"; - } - fullMessage += message; - cause = true; - } while ((e = e.getCause()) != null); - logger.severe(fullMessage); - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java deleted file mode 100644 index 2bf32098dbc..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClient.java +++ /dev/null @@ -1,486 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse.harvest.client; - -import edu.harvard.iq.dataverse.Dataverse; -import java.io.Serializable; -import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; -import java.util.GregorianCalendar; -import java.util.List; -import javax.persistence.CascadeType; -import javax.persistence.Column; -import javax.persistence.Entity; -import javax.persistence.GeneratedValue; -import javax.persistence.GenerationType; -import javax.persistence.Id; -import javax.persistence.Index; -import javax.persistence.JoinColumn; -import javax.persistence.NamedQueries; -import javax.persistence.NamedQuery; -import javax.persistence.OneToMany; -import javax.persistence.OneToOne; -import javax.persistence.OrderBy; -import javax.persistence.Table; -import javax.persistence.Temporal; -import javax.persistence.TemporalType; -import javax.validation.constraints.Pattern; -import javax.validation.constraints.Size; -import org.hibernate.validator.constraints.NotBlank; - -/** - * - * @author Leonid Andreev - */ - -@Table(indexes = {@Index(columnList="dataverse_id") - , @Index(columnList="harvesttype") - , @Index(columnList="harveststyle") - , @Index(columnList="harvestingurl")}) -@Entity -@NamedQueries({ - @NamedQuery(name = "HarvestingClient.findByNickname", query="SELECT hc FROM HarvestingClient hc WHERE LOWER(hc.name)=:nickName") -}) -public class HarvestingClient implements Serializable { - private static final long serialVersionUID = 1L; - - @Id - @GeneratedValue(strategy = GenerationType.IDENTITY) - private Long id; - - public Long getId() { - return id; - } - - public void setId(Long id) { - this.id = id; - } - - public static final String HARVEST_TYPE_OAI="oai"; - public static final String HARVEST_TYPE_NESSTAR="nesstar"; - - public static final String HARVEST_STYLE_DATAVERSE="dataverse"; - // pre-4.0 remote Dataverse: - public static final String HARVEST_STYLE_VDC="vdc"; - public static final String HARVEST_STYLE_ICPSR="icpsr"; - public static final String HARVEST_STYLE_NESSTAR="nesstar"; - public static final String HARVEST_STYLE_ROPER="roper"; - public static final String HARVEST_STYLE_HGL="hgl"; - public static final String HARVEST_STYLE_DEFAULT="default"; - - public static final String REMOTE_ARCHIVE_URL_LEVEL_DATAVERSE="dataverse"; - public static final String REMOTE_ARCHIVE_URL_LEVEL_DATASET="dataset"; - public static final String REMOTE_ARCHIVE_URL_LEVEL_FILE="file"; - - public static final String SCHEDULE_PERIOD_DAILY="daily"; - public static final String SCHEDULE_PERIOD_WEEKLY="weekly"; - - public HarvestingClient() { - this.harvestType = HARVEST_TYPE_OAI; // default harvestType - this.harvestStyle = HARVEST_STYLE_DATAVERSE; // default harvestStyle - } - - - @OneToOne (cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST }) - @JoinColumn(name="dataverse_id") - private Dataverse dataverse; - - public Dataverse getDataverse() { - return this.dataverse; - } - - public void setDataverse(Dataverse dataverse) { - this.dataverse = dataverse; - } - - @NotBlank(message = "Please enter a nickname.") - @Column(nullable = false, unique=true) - @Size(max = 30, message = "Nickname must be at most 30 characters.") - @Pattern.List({@Pattern(regexp = "[a-zA-Z0-9\\_\\-]*", message = "Found an illegal character(s). Valid characters are a-Z, 0-9, '_', and '-'."), - @Pattern(regexp=".*\\D.*", message="Nickname should not be a number")}) - private String name; - - public String getName() { - return name; - } - - public void setName(String name) { - this.name = name; - } - - private String harvestType; - - public String getHarvestType() { - return harvestType; - } - - public void setHarvestType(String harvestType) { - this.harvestType = harvestType; - } - - - public boolean isOai() { - return HARVEST_TYPE_OAI.equals(harvestType); - } - - private String harvestStyle; - - public String getHarvestStyle() { - return harvestStyle; - } - - public void setHarvestStyle(String harvestStyle) { - this.harvestStyle = harvestStyle; - } - - private String harvestingUrl; - - public String getHarvestingUrl() { - return this.harvestingUrl; - } - - public void setHarvestingUrl(String harvestingUrl) { - this.harvestingUrl = harvestingUrl.trim(); - } - - private String archiveUrl; - - public String getArchiveUrl() { - return this.archiveUrl; - } - - public void setArchiveUrl(String archiveUrl) { - this.archiveUrl = archiveUrl; - } - - @Column(columnDefinition="TEXT") - private String archiveDescription; - - public String getArchiveDescription() { - return this.archiveDescription; - } - - public void setArchiveDescription(String archiveDescription) { - this.archiveDescription = archiveDescription; - } - - private String harvestingSet; - - public String getHarvestingSet() { - return this.harvestingSet; - } - - public void setHarvestingSet(String harvestingSet) { - this.harvestingSet = harvestingSet; - } - - private String metadataPrefix; - - public String getMetadataPrefix() { - return metadataPrefix; - } - - public void setMetadataPrefix(String metadataPrefix) { - this.metadataPrefix = metadataPrefix; - } - - // TODO: do we need "orphanRemoval=true"? -- L.A. 4.4 - // TODO: should it be @OrderBy("startTime")? -- L.A. 4.4 - @OneToMany(mappedBy="harvestingClient", cascade={CascadeType.REMOVE, CascadeType.MERGE, CascadeType.PERSIST}) - @OrderBy("id") - private List harvestHistory; - - List getRunHistory() { - return harvestHistory; - } - - void setRunHistory(List harvestHistory) { - this.harvestHistory = harvestHistory; - } - - public String getLastResult() { - if (harvestHistory == null || harvestHistory.size() == 0) { - return null; - } - return harvestHistory.get(harvestHistory.size() - 1).getResultLabel(); - } - - public ClientHarvestRun getLastRun() { - if (harvestHistory == null || harvestHistory.size() == 0) { - return null; - } - - return harvestHistory.get(harvestHistory.size() - 1); - } - - public ClientHarvestRun getLastSuccessfulRun() { - if (harvestHistory == null || harvestHistory.size() == 0) { - return null; - } - - int i = harvestHistory.size() - 1; - - while (i > -1) { - if (harvestHistory.get(i).isSuccess()) { - return harvestHistory.get(i); - } - i--; - } - - return null; - } - - ClientHarvestRun getLastNonEmptyRun() { - if (harvestHistory == null || harvestHistory.size() == 0) { - return null; - } - - int i = harvestHistory.size() - 1; - - while (i > -1) { - if (harvestHistory.get(i).isSuccess()) { - if (harvestHistory.get(i).getHarvestedDatasetCount().longValue() > 0 || - harvestHistory.get(i).getDeletedDatasetCount().longValue() > 0) { - return harvestHistory.get(i); - } - } - i--; - } - return null; - } - - public Date getLastHarvestTime() { - ClientHarvestRun lastHarvest = getLastRun(); - if ( lastHarvest != null) { - return lastHarvest.getStartTime(); - } - return null; - } - - public Date getLastSuccessfulHarvestTime() { - ClientHarvestRun lastSuccessfulHarvest = getLastSuccessfulRun(); - if ( lastSuccessfulHarvest != null) { - return lastSuccessfulHarvest.getStartTime(); - } - return null; - } - - public Date getLastNonEmptyHarvestTime() { - ClientHarvestRun lastNonEmptyHarvest = getLastNonEmptyRun(); - if ( lastNonEmptyHarvest != null) { - return lastNonEmptyHarvest.getStartTime(); - } - return null; - } - - public Long getLastHarvestedDatasetCount() { - ClientHarvestRun lastNonEmptyHarvest = getLastNonEmptyRun(); - if ( lastNonEmptyHarvest != null) { - return lastNonEmptyHarvest.getHarvestedDatasetCount(); - } - return null; - } - - public Long getLastFailedDatasetCount() { - ClientHarvestRun lastNonEmptyHarvest = getLastNonEmptyRun(); - if ( lastNonEmptyHarvest != null) { - return lastNonEmptyHarvest.getFailedDatasetCount(); - } - return null; - } - - public Long getLastDeletedDatasetCount() { - ClientHarvestRun lastNonEmptyHarvest = getLastNonEmptyRun(); - if ( lastNonEmptyHarvest != null) { - return lastNonEmptyHarvest.getDeletedDatasetCount(); - } - return null; - } - - /* move the fields below to the new HarvestingClientRun class: - private String harvestResult; - - public String getResult() { - return harvestResult; - } - - public void setResult(String harvestResult) { - this.harvestResult = harvestResult; - } - - // "Last Harvest Time" is the last time we *attempted* to harvest - // from this remote resource. - // It wasn't necessarily a successful attempt! - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastHarvestTime; - - public Date getLastHarvestTime() { - return lastHarvestTime; - } - - public void setLastHarvestTime(Date lastHarvestTime) { - this.lastHarvestTime = lastHarvestTime; - } - - // This is the last "successful harvest" - i.e., the last time we - // tried to harvest, and got a response from the remote server. - // We may not have necessarily harvested any useful content though; - // the result may have been a "no content" or "no changes since the last harvest" - // response. - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastSuccessfulHarvestTime; - - public Date getLastSuccessfulHarvestTime() { - return lastSuccessfulHarvestTime; - } - - public void setLastSuccessfulHarvestTime(Date lastSuccessfulHarvestTime) { - this.lastSuccessfulHarvestTime = lastSuccessfulHarvestTime; - } - - // Finally, this is the time stamp from the last "non-empty" harvest. - // I.e. the last time we ran a harvest that actually resulted in - // some Datasets created, updated or deleted: - - @Temporal(value = TemporalType.TIMESTAMP) - private Date lastNonEmptyHarvestTime; - - public Date getLastNonEmptyHarvestTime() { - return lastNonEmptyHarvestTime; - } - - public void setLastNonEmptyHarvestTime(Date lastNonEmptyHarvestTime) { - this.lastNonEmptyHarvestTime = lastNonEmptyHarvestTime; - } - - // And these are the Dataset counts from that last "non-empty" harvest: - private Long harvestedDatasetCount; - private Long failedDatasetCount; - private Long deletedDatasetCount; - - public Long getLastHarvestedDatasetCount() { - return harvestedDatasetCount; - } - - public void setHarvestedDatasetCount(Long harvestedDatasetCount) { - this.harvestedDatasetCount = harvestedDatasetCount; - } - - public Long getLastFailedDatasetCount() { - return failedDatasetCount; - } - - public void setFailedDatasetCount(Long failedDatasetCount) { - this.failedDatasetCount = failedDatasetCount; - } - - public Long getLastDeletedDatasetCount() { - return deletedDatasetCount; - } - - public void setDeletedDatasetCount(Long deletedDatasetCount) { - this.deletedDatasetCount = deletedDatasetCount; - } - */ - - private boolean scheduled; - - public boolean isScheduled() { - return this.scheduled; - } - - public void setScheduled(boolean scheduled) { - this.scheduled = scheduled; - } - - private String schedulePeriod; - - public String getSchedulePeriod() { - return schedulePeriod; - } - - public void setSchedulePeriod(String schedulePeriod) { - this.schedulePeriod = schedulePeriod; - } - - private Integer scheduleHourOfDay; - - public Integer getScheduleHourOfDay() { - return scheduleHourOfDay; - } - - public void setScheduleHourOfDay(Integer scheduleHourOfDay) { - this.scheduleHourOfDay = scheduleHourOfDay; - } - - private Integer scheduleDayOfWeek; - - public Integer getScheduleDayOfWeek() { - return scheduleDayOfWeek; - } - - public void setScheduleDayOfWeek(Integer scheduleDayOfWeek) { - this.scheduleDayOfWeek = scheduleDayOfWeek; - } - - public String getScheduleDescription() { - Date date = new Date(); - Calendar cal = new GregorianCalendar(); - cal.setTime(date); - SimpleDateFormat weeklyFormat = new SimpleDateFormat(" E h a "); - SimpleDateFormat dailyFormat = new SimpleDateFormat(" h a "); - String desc = "Not Scheduled"; - if (schedulePeriod!=null && schedulePeriod!="") { - cal.set(Calendar.HOUR_OF_DAY, scheduleHourOfDay); - if (schedulePeriod.equals(this.SCHEDULE_PERIOD_WEEKLY)) { - cal.set(Calendar.DAY_OF_WEEK,scheduleDayOfWeek); - desc="Weekly, "+weeklyFormat.format(cal.getTime()); - } else { - desc="Daily, "+dailyFormat.format(cal.getTime()); - } - } - return desc; - } - - private boolean harvestingNow; - - public boolean isHarvestingNow() { - return this.harvestingNow; - } - - public void setHarvestingNow(boolean harvestingNow) { - this.harvestingNow = harvestingNow; - } - - @Override - public int hashCode() { - int hash = 0; - hash += (id != null ? id.hashCode() : 0); - return hash; - } - - @Override - public boolean equals(Object object) { - // TODO: Warning - this method won't work in the case the id fields are not set - if (!(object instanceof HarvestingClient)) { - return false; - } - HarvestingClient other = (HarvestingClient) object; - if ((this.id == null && other.id != null) || (this.id != null && !this.id.equals(other.id))) { - return false; - } - return true; - } - - @Override - public String toString() { - return "edu.harvard.iq.dataverse.HarvestingDataverse[ id=" + id + " ]"; - } - -} diff --git a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java deleted file mode 100644 index 74634399f5b..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/harvest/client/HarvestingClientServiceBean.java +++ /dev/null @@ -1,143 +0,0 @@ -package edu.harvard.iq.dataverse.harvest.client; - -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import java.util.ArrayList; -import java.util.Date; -import java.util.List; -import java.util.logging.Logger; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.ejb.TransactionAttribute; -import javax.ejb.TransactionAttributeType; -import javax.faces.bean.ManagedBean; -import javax.inject.Named; -import javax.persistence.EntityManager; -import javax.persistence.NoResultException; -import javax.persistence.NonUniqueResultException; -import javax.persistence.PersistenceContext; - -/** - * - * @author Leonid Andreev - * - * Dedicated service for managing Harvesting Client Configurations - */ -@Stateless -@Named -//@ManagedBean -public class HarvestingClientServiceBean { - @EJB - DataverseServiceBean dataverseService; - - @PersistenceContext(unitName = "VDCNet-ejbPU") - private EntityManager em; - - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.harvest.client.HarvestingClinetServiceBean"); - - public HarvestingClient find(Object pk) { - return (HarvestingClient) em.find(HarvestingClient.class, pk); - } - - public HarvestingClient findByNickname(String nickName) { - try { - return em.createNamedQuery("HarvestingClient.findByNickname", HarvestingClient.class) - .setParameter("nickName", nickName.toLowerCase()) - .getSingleResult(); - } catch ( NoResultException|NonUniqueResultException ex ) { - logger.fine("Unable to find a single harvesting client by nickname \"" + nickName + "\": " + ex); - return null; - } - } - - public List getAllHarvestingClients() { - try { - return em.createQuery("SELECT object(c) FROM HarvestingClient AS c ORDER BY c.id").getResultList(); - } catch (Exception ex) { - logger.warning("Unknown exception caught while looking up configured Harvesting Clients: "+ex.getMessage()); - } - return null; - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void resetHarvestInProgress(Long hdId) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - if (!hd.isHarvested()) { - return; - } - hd.getHarvestingClientConfig().setHarvestingNow(false); - - // And if there is an unfinished RunResult object, we'll - // just mark it as a failure: - if (hd.getHarvestingClientConfig().getLastRun() != null - && hd.getHarvestingClientConfig().getLastRun().isInProgress()) { - hd.getHarvestingClientConfig().getLastRun().setFailed(); - } - - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestInProgress(Long hdId, Date startTime) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - HarvestingClient harvestingClient = hd.getHarvestingClientConfig(); - if (harvestingClient == null) { - return; - } - harvestingClient.setHarvestingNow(false); - if (harvestingClient.getRunHistory() == null) { - harvestingClient.setRunHistory(new ArrayList()); - } - ClientHarvestRun currentRun = new ClientHarvestRun(); - currentRun.setHarvestingClient(harvestingClient); - currentRun.setStartTime(startTime); - currentRun.setInProgress(); - harvestingClient.getRunHistory().add(currentRun); - } - - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestSuccess(Long hdId, Date currentTime, int harvestedCount, int failedCount) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - HarvestingClient harvestingClient = hd.getHarvestingClientConfig(); - if (harvestingClient == null) { - return; - } - - ClientHarvestRun currentRun = harvestingClient.getLastRun(); - - if (currentRun != null && currentRun.isInProgress()) { - // TODO: what if there's no current run in progress? should we just - // give up quietly, or should we make a noise of some kind? -- L.A. 4.4 - - currentRun.setSuccess(); - currentRun.setFinishTime(currentTime); - currentRun.setHarvestedDatasetCount(new Long(harvestedCount)); - currentRun.setFailedDatasetCount(new Long(failedCount)); - - /*TODO: still need to record the number of deleted datasets! */ - } - } - - @TransactionAttribute(TransactionAttributeType.REQUIRES_NEW) - public void setHarvestFailure(Long hdId, Date currentTime) { - Dataverse hd = em.find(Dataverse.class, hdId); - em.refresh(hd); - HarvestingClient harvestingClient = hd.getHarvestingClientConfig(); - if (harvestingClient == null) { - return; - } - - ClientHarvestRun currentRun = harvestingClient.getLastRun(); - - if (currentRun != null && currentRun.isInProgress()) { - // TODO: what if there's no current run in progress? should we just - // give up quietly, or should we make a noise of some kind? -- L.A. 4.4 - - currentRun.setFailed(); - currentRun.setFinishTime(currentTime); - } - } -} diff --git a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java b/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java deleted file mode 100644 index 49bac288008..00000000000 --- a/src/main/java/edu/harvard/iq/dataverse/timer/DataverseTimerServiceBean.java +++ /dev/null @@ -1,252 +0,0 @@ -/* - * To change this license header, choose License Headers in Project Properties. - * To change this template file, choose Tools | Templates - * and open the template in the editor. - */ -package edu.harvard.iq.dataverse.timer; - -import edu.harvard.iq.dataverse.Dataverse; -import edu.harvard.iq.dataverse.DataverseServiceBean; -import edu.harvard.iq.dataverse.authorization.AuthenticationServiceBean; -import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUser; -import edu.harvard.iq.dataverse.authorization.providers.builtin.BuiltinUserServiceBean; -import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser; -import edu.harvard.iq.dataverse.engine.command.DataverseRequest; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; -import edu.harvard.iq.dataverse.harvest.client.HarvestTimerInfo; -import edu.harvard.iq.dataverse.harvest.client.HarvesterServiceBean; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClientServiceBean; -import java.io.Serializable; -import java.net.InetAddress; -import java.net.UnknownHostException; -import java.util.Calendar; -import java.util.Date; -import java.util.Iterator; -import java.util.logging.Level; -import java.util.logging.Logger; -import javax.annotation.Resource; -import javax.ejb.EJB; -import javax.ejb.Stateless; -import javax.ejb.Timeout; -import javax.ejb.Timer; -import javax.ejb.TransactionAttribute; -import javax.ejb.TransactionAttributeType; -import javax.persistence.EntityManager; -import javax.persistence.PersistenceContext; - - -/** - * - * @author roberttreacy - */ -@Stateless -public class DataverseTimerServiceBean implements Serializable { - @Resource - javax.ejb.TimerService timerService; - @PersistenceContext(unitName = "VDCNet-ejbPU") - private EntityManager em; - private static final Logger logger = Logger.getLogger("edu.harvard.iq.dataverse.timer.DataverseTimerServiceBean"); - @EJB - HarvesterServiceBean harvesterService; - @EJB - DataverseServiceBean dataverseService; - @EJB - HarvestingClientServiceBean harvestingClientService; - @EJB - AuthenticationServiceBean authSvc; - - /*@EJB - StudyServiceLocal studyService;*/ - - - public void createTimer(Date initialExpiration, long intervalDuration, Serializable info) { - try { - logger.log(Level.INFO,"Creating timer on " + InetAddress.getLocalHost().getCanonicalHostName()); - } catch (UnknownHostException ex) { - Logger.getLogger(DataverseTimerServiceBean.class.getName()).log(Level.SEVERE, null, ex); - } - timerService.createTimer(initialExpiration, intervalDuration, info); - } - - - /** - * This method is called whenever an EJB Timer goes off. - * Check to see if this is a Harvest Timer, and if it is - * Run the harvest for the given (scheduled) dataverse - * @param timer - */ - @Timeout - @TransactionAttribute(TransactionAttributeType.NOT_SUPPORTED) - public void handleTimeout(javax.ejb.Timer timer) { - // We have to put all the code in a try/catch block because - // if an exception is thrown from this method, Glassfish will automatically - // call the method a second time. (The minimum number of re-tries for a Timer method is 1) - - try { - logger.log(Level.INFO,"Handling timeout on " + InetAddress.getLocalHost().getCanonicalHostName()); - } catch (UnknownHostException ex) { - Logger.getLogger(DataverseTimerServiceBean.class.getName()).log(Level.SEVERE, null, ex); - } - if (timer.getInfo() instanceof HarvestTimerInfo) { - HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); - try { - - logger.log(Level.INFO, "running a harvesting client: id=" + info.getHarvestingClientId()); - // Timer batch jobs are run by the main Admin user. - // TODO: revisit how we retrieve the superuser here. - // (looking it up by the identifier "admin" is not necessarily the - // cleanest way). Should it be configurable somewhere, which superuser - // runs these jobs? Should there be a central mechanism for obtaining - // the "major", builtin superuser for this Dataverse instance? - // -- L.A. 4.4, May 8 2016 - DataverseRequest dataverseRequest = null; - AuthenticatedUser adminUser = authSvc.getAuthenticatedUser("admin"); - if (adminUser != null) { - dataverseRequest = new DataverseRequest(adminUser, null); - } - // TODO: create a real DataverseRequest here, associated with the main admin user (?) - // -- L.A. 4.4, May 8 2016 - harvesterService.doHarvest(dataverseRequest, info.getHarvestingClientId()); - - } catch (Throwable e) { - // Harvester Service should be handling any error notifications, - // if/when things go wrong. - // (TODO: -- verify this logic; harvesterService may still be able - // to throw an IOException, if it could not run the harvest at all, - // or could not for whatever reason modify the database record... - // in this case we should, probably, log the error and try to send - // a mail notification. -- L.A. 4.4) - //dataverseService.setHarvestResult(info.getHarvestingDataverseId(), harvesterService.HARVEST_RESULT_FAILED); - //mailService.sendHarvestErrorNotification(dataverseService.find().getSystemEmail(), dataverseService.find().getName()); - logException(e, logger); - } - } - /* Export timers: (not yet implemented!) -- L.A. - if (timer.getInfo() instanceof ExportTimerInfo) { - try { - ExportTimerInfo info = (ExportTimerInfo) timer.getInfo(); - logger.info("handling timeout"); - studyService.exportUpdatedStudies(); - } catch (Throwable e) { - mailService.sendExportErrorNotification(vdcNetworkService.find().getSystemEmail(), vdcNetworkService.find().getName()); - logException(e, logger); - } - } - */ - - } - - public void removeHarvestTimers() { - // Remove all the harvest timers, if exist: - // - // (the logging messages below are set to level INFO; it's ok, - // since this code is only called on startup of the application, - // and it may be useful to know what existing timers were encountered). - - logger.log(Level.INFO,"Removing existing harvest timers.."); - - int i = 1; - for (Iterator it = timerService.getTimers().iterator(); it.hasNext();) { - - Timer timer = (Timer) it.next(); - logger.log(Level.INFO, "HarvesterService: checking timer "+i); - - if (timer.getInfo() instanceof HarvestTimerInfo) { - logger.log(Level.INFO, "HarvesterService: timer "+i+" is a harvesting one; removing."); - timer.cancel(); - } - - i++; - } - } - - public void createHarvestTimer(HarvestingClient harvestingClient) { - - if (harvestingClient.isScheduled()) { - long intervalDuration = 0; - Calendar initExpiration = Calendar.getInstance(); - initExpiration.set(Calendar.MINUTE, 0); - initExpiration.set(Calendar.SECOND, 0); - if (harvestingClient.getSchedulePeriod().equals(HarvestingClient.SCHEDULE_PERIOD_DAILY)) { - intervalDuration = 1000 * 60 * 60 * 24; - initExpiration.set(Calendar.HOUR_OF_DAY, harvestingClient.getScheduleHourOfDay()); - - } else if (harvestingClient.getSchedulePeriod().equals(harvestingClient.SCHEDULE_PERIOD_WEEKLY)) { - intervalDuration = 1000 * 60 * 60 * 24 * 7; - initExpiration.set(Calendar.HOUR_OF_DAY, harvestingClient.getScheduleHourOfDay()); - initExpiration.set(Calendar.DAY_OF_WEEK, harvestingClient.getScheduleDayOfWeek()); - - } else { - logger.log(Level.WARNING, "Could not set timer for harvesting client id=" + harvestingClient.getId() + ", unknown schedule period: " + harvestingClient.getSchedulePeriod()); - return; - } - Date initExpirationDate = initExpiration.getTime(); - Date currTime = new Date(); - if (initExpirationDate.before(currTime)) { - initExpirationDate.setTime(initExpiration.getTimeInMillis() + intervalDuration); - } - logger.log(Level.INFO, "Setting timer for harvesting client " + harvestingClient.getName() + ", initial expiration: " + initExpirationDate); - createTimer(initExpirationDate, intervalDuration, new HarvestTimerInfo(harvestingClient.getId(), harvestingClient.getName(), harvestingClient.getSchedulePeriod(), harvestingClient.getScheduleHourOfDay(), harvestingClient.getScheduleDayOfWeek())); - } - } - - public void updateHarvestTimer(HarvestingClient harvestingClient) { - removeHarvestTimer(harvestingClient); - createHarvestTimer(harvestingClient); - } - - - public void removeHarvestTimer(HarvestingClient harvestingClient) { - // Clear dataverse timer, if one exists - try { - logger.log(Level.INFO,"Removing harvest timer on " + InetAddress.getLocalHost().getCanonicalHostName()); - } catch (UnknownHostException ex) { - Logger.getLogger(DataverseTimerServiceBean.class.getName()).log(Level.SEVERE, null, ex); - } - for (Iterator it = timerService.getTimers().iterator(); it.hasNext();) { - Timer timer = (Timer) it.next(); - if (timer.getInfo() instanceof HarvestTimerInfo) { - HarvestTimerInfo info = (HarvestTimerInfo) timer.getInfo(); - if (info.getHarvestingClientId().equals(harvestingClient.getId())) { - timer.cancel(); - } - } - } - } - - public void createExportTimer() { - /* Not yet implemented. The DVN 3 implementation can be used as a model */ - - } - - public void createExportTimer(Dataverse dataverse) { - /* Not yet implemented. The DVN 3 implementation can be used as a model */ - - } - - public void removeExportTimer() { - /* Not yet implemented. The DVN 3 implementation can be used as a model */ - } - - /* Utility methods: */ - private void logException(Throwable e, Logger logger) { - - boolean cause = false; - String fullMessage = ""; - do { - String message = e.getClass().getName() + " " + e.getMessage(); - if (cause) { - message = "\nCaused By Exception.................... " + e.getClass().getName() + " " + e.getMessage(); - } - StackTraceElement[] ste = e.getStackTrace(); - message += "\nStackTrace: \n"; - for (int m = 0; m < ste.length; m++) { - message += ste[m].toString() + "\n"; - } - fullMessage += message; - cause = true; - } while ((e = e.getCause()) != null); - logger.severe(fullMessage); - } - -} \ No newline at end of file diff --git a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java index 4b77feb24ff..ebbe1117c3b 100644 --- a/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java +++ b/src/main/java/edu/harvard/iq/dataverse/util/json/JsonParser.java @@ -19,7 +19,6 @@ import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.IpGroup; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddress; import edu.harvard.iq.dataverse.authorization.groups.impl.ipaddress.ip.IpAddressRange; -import edu.harvard.iq.dataverse.harvest.client.HarvestingClient; import edu.harvard.iq.dataverse.settings.SettingsServiceBean; import java.io.StringReader; import java.text.ParseException; @@ -541,19 +540,5 @@ Long parseLong(String str) throws NumberFormatException { int parsePrimitiveInt(String str, int defaultValue) { return str == null ? defaultValue : Integer.parseInt(str); } - - public String parseHarvestingClient(JsonObject obj, HarvestingClient harvestingClient) throws JsonParseException { - - String dataverseAlias = obj.getString("dataverseAlias",null); - - harvestingClient.setName(obj.getString("nickName",null)); - harvestingClient.setHarvestType(obj.getString("type",null)); - harvestingClient.setHarvestingUrl(obj.getString("harvestUrl",null)); - harvestingClient.setArchiveUrl(obj.getString("archiveUrl",null)); - harvestingClient.setArchiveDescription(obj.getString("archiveDescription")); - harvestingClient.setMetadataPrefix(obj.getString("metadataFormat",null)); - harvestingClient.setHarvestingSet(obj.getString("set",null)); - - return dataverseAlias; - } + } From 13c89edc1b60a9c3c6234d5bedc020c986235602 Mon Sep 17 00:00:00 2001 From: Leonid Andreev Date: Wed, 11 May 2016 17:58:31 -0400 Subject: [PATCH 34/37] Trying again, with the infamous CORS header patch. --- src/main/java/edu/harvard/iq/dataverse/api/Search.java | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/main/java/edu/harvard/iq/dataverse/api/Search.java b/src/main/java/edu/harvard/iq/dataverse/api/Search.java index 46f94e1208c..c65084cedb7 100644 --- a/src/main/java/edu/harvard/iq/dataverse/api/Search.java +++ b/src/main/java/edu/harvard/iq/dataverse/api/Search.java @@ -27,9 +27,11 @@ import javax.json.Json; import javax.json.JsonArrayBuilder; import javax.json.JsonObjectBuilder; +import javax.servlet.http.HttpServletResponse; import javax.ws.rs.GET; import javax.ws.rs.Path; import javax.ws.rs.QueryParam; +import javax.ws.rs.core.Context; import javax.ws.rs.core.Response; import org.apache.commons.lang.StringUtils; @@ -65,7 +67,8 @@ public Response search( @QueryParam("fq") final List filterQueries, @QueryParam("show_entity_ids") boolean showEntityIds, @QueryParam("show_api_urls") boolean showApiUrls, - @QueryParam("show_my_data") boolean showMyData + @QueryParam("show_my_data") boolean showMyData, + @Context HttpServletResponse response ) { User user; @@ -182,6 +185,7 @@ public Response search( */ return errorResponse(Response.Status.BAD_REQUEST, solrQueryResponse.getError()); } + response.setHeader("Access-Control-Allow-Origin", "*"); return okResponse(value); } else { return errorResponse(Response.Status.BAD_REQUEST, "q parameter is missing"); From 3030ada6aecde7c09e8cdb5a3d75edba08358af7 Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Wed, 11 May 2016 18:29:32 -0400 Subject: [PATCH 35/37] Update pom.xml --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index c8c46e87396..31c1f634327 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ edu.harvard.iq dataverse - 4.3 + 4.3.1 war dataverse From f645a423f6a97697f8b564910ef1264ff2df74a8 Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Wed, 11 May 2016 18:32:28 -0400 Subject: [PATCH 36/37] Update conf.py --- doc/sphinx-guides/source/conf.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/conf.py b/doc/sphinx-guides/source/conf.py index dcd5c049808..5ab7e69b006 100755 --- a/doc/sphinx-guides/source/conf.py +++ b/doc/sphinx-guides/source/conf.py @@ -63,9 +63,9 @@ # built documents. # # The short X.Y version. -version = '4.3' +version = '4.3.1' # The full version, including alpha/beta/rc tags. -release = '4.3' +release = '4.3.1' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. From 246bc765b0e26ef76a91b0b01c32ee711fe2cadc Mon Sep 17 00:00:00 2001 From: Gustavo Durand Date: Wed, 11 May 2016 18:34:08 -0400 Subject: [PATCH 37/37] Update index.rst --- doc/sphinx-guides/source/index.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/sphinx-guides/source/index.rst b/doc/sphinx-guides/source/index.rst index db0353ea400..f820076ebd1 100755 --- a/doc/sphinx-guides/source/index.rst +++ b/doc/sphinx-guides/source/index.rst @@ -3,10 +3,10 @@ You can adapt this file completely to your liking, but it should at least contain the root `toctree` directive. -Dataverse 4.3 Guides +Dataverse 4.3.1 Guides ====================== -These guides are for the most recent version of Dataverse. For the guides for **version 4.2.4** please go `here `_. +These guides are for the most recent version of Dataverse. For the guides for **version 4.3** please go `here `_. .. toctree:: :glob: