'.$sArr['name'].': ';
}
if($depTraitId){
$innerStr .= $this->getTraitUnitString($depTraitId,$isCoded,trim($classStr.' child-'.$sid));
diff --git a/classes/OccurrenceCollectionProfile.php b/classes/OccurrenceCollectionProfile.php
index bced4ec329..f7eb880f67 100644
--- a/classes/OccurrenceCollectionProfile.php
+++ b/classes/OccurrenceCollectionProfile.php
@@ -135,14 +135,14 @@ public function getMetadataHtml($collArr, $LANG){
$outStr .= '
';
$outStr .= ''.$LANG['MANAGEMENT'].': ';
if($collArr['managementtype'] == 'Live Data'){
- $outStr .= 'Live Data managed directly within data portal';
+ $outStr .= (isset($LANG['LIVE_DATA'])?$LANG['LIVE_DATA']:'Live Data managed directly within data portal');
}
else{
if($collArr['managementtype'] == 'Aggregate'){
- $outStr .= 'Data harvested from a data aggregator';
+ $outStr .= (isset($LANG['DATA_AGGREGATE'])?$LANG['DATA_AGGREGATE']:'Data harvested from a data aggregator');
}
else{
- $outStr .= 'Data snapshot of local collection database ';
+ $outStr .= (isset($LANG['DATA_SNAPSHOT'])?$LANG['DATA_SNAPSHOT']:'Data snapshot of local collection database ');
}
$outStr .= '
';
?>
@@ -222,7 +222,7 @@ public function outputFullCollArr($collGrpArr, $targetCatID = '', $displayIcons
-
">
+
">
-
+
';
?>
diff --git a/classes/SpecProcessorOcr.php b/classes/SpecProcessorOcr.php
index 975f3e774b..633160aeab 100644
--- a/classes/SpecProcessorOcr.php
+++ b/classes/SpecProcessorOcr.php
@@ -3,10 +3,10 @@
* Used by automatic nightly process and by the occurrence editor (/collections/editor/occurrenceeditor.php)
*/
include_once($SERVER_ROOT.'/config/dbconnection.php');
+include_once($SERVER_ROOT.'/classes/Manager.php');
-class SpecProcessorOcr{
+class SpecProcessorOcr extends Manager{
- private $conn;
private $tempPath;
private $imgUrlLocal;
private $deleteAllOcrFiles = 0;
@@ -20,19 +20,13 @@ class SpecProcessorOcr{
private $specKeyPattern;
private $ocrSource;
- //If silent is set, script will produce no non-fatal output.
- private $verbose = 0; //0 = silent, 1 = logFile, 2 = echo, 3 = both
- private $logFH;
- private $errorStr;
-
function __construct() {
+ parent::__construct(null,'write');
$this->setTempPath();
- $this->conn = MySQLiConnectionFactory::getCon("write");
}
function __destruct(){
- if($this->logFH) fclose($this->logFH);
- if(!($this->conn === false)) $this->conn->close();
+ parent::__destruct();
//unlink($this->imgUrlLocal);
}
@@ -78,33 +72,33 @@ private function ocrImageByUrl($imgUrl,$getBest = 0,$sciName=''){
}
else{
$err = 'ERROR: Unable to load image, URL: '.$imgUrl;
- $this->logMsg($err,1);
+ $this->logOrEcho($err,1);
$rawStr = 'ERROR';
}
}
else{
$err = 'ERROR: Empty URL';
- $this->logMsg($err,1);
+ $this->logOrEcho($err,1);
$rawStr = 'ERROR';
}
return $rawStr;
}
private function ocrImage($url = ""){
- global $tesseractPath;
+ global $TESSERACT_PATH;
$retStr = '';
if(!$url) $url = $this->imgUrlLocal;
if($url){
//OCR image, result text is output to $outputFile
$output = array();
$outputFile = substr($url,0,strlen($url)-4);
- if(isset($tesseractPath) && $tesseractPath){
- if(substr($tesseractPath,0,2) == 'C:'){
+ if(isset($TESSERACT_PATH) && $TESSERACT_PATH){
+ if(substr($TESSERACT_PATH,0,2) == 'C:'){
//Full path to tesseract with quotes needed for Windows
- exec('"'.$tesseractPath.'" '.$url.' '.$outputFile,$output);
+ exec('"'.$TESSERACT_PATH.'" '.$url.' '.$outputFile,$output);
}
else{
- exec($tesseractPath.' '.$url.' '.$outputFile,$output);
+ exec($TESSERACT_PATH.' '.$url.' '.$outputFile,$output);
}
}
else{
@@ -124,7 +118,7 @@ private function ocrImage($url = ""){
unlink($outputFile.'.txt');
}
else{
- $this->logMsg("ERROR: Unable to locate output file",1);
+ $this->logOrEcho("ERROR: Unable to locate output file",1);
}
}
return $retStr;//$this->cleanRawStr($retStr);
@@ -144,8 +138,8 @@ private function databaseRawStr($imgId,$rawStr,$notes,$source){
return true;
}
else{
- $this->logMsg("ERROR: Unable to load fragment into database: ".$this->conn->error,1);
- $this->logMsg("SQL: ".$sql,2);
+ $this->logOrEcho("ERROR: Unable to load fragment into database: ".$this->conn->error,1);
+ $this->logOrEcho("SQL: ".$sql,2);
return false;
}
}
@@ -198,7 +192,7 @@ public function batchOcrUnprocessed($inCollStr,$procStatus = 'unprocessed',$limi
//Batch OCR
foreach($collArr as $collid => $instCode){
- $this->logMsg('Starting batch processing for '.$instCode);
+ $this->logOrEcho('Starting batch processing for '.$instCode);
$sql = 'SELECT i.imgid, IFNULL(i.originalurl, i.url) AS url, o.sciName, i.occid '.
'FROM omoccurrences o INNER JOIN images i ON o.occid = i.occid '.
'LEFT JOIN specprocessorrawlabels r ON i.imgid = r.imgid '.
@@ -210,7 +204,7 @@ public function batchOcrUnprocessed($inCollStr,$procStatus = 'unprocessed',$limi
while($r = $rs->fetch_object()){
$rawStr = $this->ocrImageByUrl($r->url,$getBest,$r->sciName);
if($rawStr != 'ERROR'){
- $this->logMsg('#'.$recCnt.': image '.$r->imgid.' processed ('.date("Y-m-d H:i:s").')');
+ $this->logOrEcho('#'.$recCnt.': image '.$r->imgid.' processed ('.date("Y-m-d H:i:s").')');
$notes = '';
$source = 'Tesseract: '.date('Y-m-d');
$this->databaseRawStr($r->imgid,$rawStr,$notes,$source);
@@ -233,8 +227,8 @@ public function harvestOcrText($postArr){
$this->ocrSource = $postArr['ocrsource'];
$this->specKeyPattern = $postArr['speckeypattern'];
if(!$this->specKeyPattern){
- $this->errorStr = 'ERROR loading OCR files: Specimen catalog number pattern missing';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR files: Specimen catalog number pattern missing';
+ $this->logOrEcho($this->errorMessage);
return false;
}
$sourcePath = '';
@@ -246,38 +240,38 @@ public function harvestOcrText($postArr){
$sourcePath = $this->uploadOcrFile();
}
if(!$sourcePath){
- $this->errorStr = 'ERROR loading OCR files: OCR source path is missing';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR files: OCR source path is missing';
+ $this->logOrEcho($this->errorMessage);
return false;
}
if(substr($sourcePath,0,4) == 'http'){
//http protocol, thus test for a valid page
$headerArr = get_headers($sourcePath);
if(!$headerArr){
- $this->errorStr = 'ERROR loading OCR files: sourcePath returned bad headers ('.$sourcePath.')';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR files: sourcePath returned bad headers ('.$sourcePath.')';
+ $this->logOrEcho($this->errorMessage);
return false;
}
preg_match('/http.+\s{1}(\d{3})\s{1}/i',$headerArr[0],$codeArr);
if($codeArr[1] == '403'){
- $this->errorStr = 'ERROR loading OCR files: sourcePath returned Forbidden ('.$sourcePath.')';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR files: sourcePath returned Forbidden ('.$sourcePath.')';
+ $this->logOrEcho($this->errorMessage);
return false;
}
if($codeArr[1] == '404'){
- $this->errorStr = 'ERROR loading OCR files: sourcePath returned a page Not Found error ('.$sourcePath.')';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR files: sourcePath returned a page Not Found error ('.$sourcePath.')';
+ $this->logOrEcho($this->errorMessage);
return false;
}
if($codeArr[1] != '200'){
- $this->errorStr = 'ERROR loading OCR files: sourcePath returned error code '.$codeArr[1].' ('.$sourcePath.')';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR files: sourcePath returned error code '.$codeArr[1].' ('.$sourcePath.')';
+ $this->logOrEcho($this->errorMessage);
return false;
}
}
elseif(!file_exists($sourcePath)){
- $this->errorStr = 'ERROR loading OCR files: sourcePath does not exist ('.$sourcePath.')';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR files: sourcePath does not exist ('.$sourcePath.')';
+ $this->logOrEcho($this->errorMessage);
return false;
}
//Initiate processing
@@ -288,7 +282,7 @@ public function harvestOcrText($postArr){
else{
$this->processOcrFolder($sourcePath);
}
- $this->logMsg('Done loading OCR files ');
+ $this->logOrEcho('Done loading OCR files ');
return $status;
@@ -297,13 +291,13 @@ public function harvestOcrText($postArr){
private function uploadOcrFile(){
$retPath = '';
if(!array_key_exists('ocrfile',$_FILES)){
- $this->errorStr = 'ERROR loading OCR file: OCR file missing';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR file: OCR file missing';
+ $this->logOrEcho($this->errorMessage);
return ;
}
if(!$this->tempPath){
- $this->errorStr = 'ERROR loading OCR file: temp target path empty';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR file: temp target path empty';
+ $this->logOrEcho($this->errorMessage);
return ;
}
$zipPath = $this->tempPath.'ocrupload.zip';
@@ -321,14 +315,14 @@ private function uploadOcrFile(){
unlink($zipPath);
}
else{
- $this->errorStr = 'ERROR unpacking OCR file: '.$res;
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR unpacking OCR file: '.$res;
+ $this->logOrEcho($this->errorMessage);
return ;
}
}
else{
- $this->errorStr = 'ERROR loading OCR file: input file lacks zip extension';
- $this->logMsg($this->errorStr);
+ $this->errorMessage = 'ERROR loading OCR file: input file lacks zip extension';
+ $this->logOrEcho($this->errorMessage);
return ;
}
return $retPath;
@@ -344,16 +338,16 @@ private function processOcrHtml($sourcePath){
if(!in_array($fileName,$skipAnchors)){
$fileExt = strtolower(substr($fileName,strrpos($fileName,'.')+1));
if($fileExt){
- $this->logMsg("Processing OCR File: ".$fileName);
+ $this->logOrEcho("Processing OCR File: ".$fileName);
if($fileExt == "txt"){
$this->processOcrFile($fileName,$sourcePath);
}
else{
- $this->logMsg("ERROR: File skipped, not a supported OCR file with .txt extension: ".$sourcePath.$fileName);
+ $this->logOrEcho("ERROR: File skipped, not a supported OCR file with .txt extension: ".$sourcePath.$fileName);
}
}
elseif(stripos($fileName,'Parent Dir') === false){
- $this->logMsg('New dir path: '.$sourcePath.$fileName);
+ $this->logOrEcho('New dir path: '.$sourcePath.$fileName);
$this->processOcrHtml($sourcePath.$fileName.'/');
}
}
@@ -367,13 +361,13 @@ private function processOcrFolder($sourcePath){
while($fileName = readdir($dirFH)){
if($fileName != "." && $fileName != ".." && $fileName != ".svn"){
if(is_file($sourcePath.$fileName)){
- $this->logMsg("Processing OCR File: ".$fileName);
+ $this->logOrEcho("Processing OCR File: ".$fileName);
$fileExt = strtolower(substr($fileName,strrpos($fileName,'.')));
if($fileExt == ".txt"){
$this->processOcrFile($fileName,$sourcePath);
}
else{
- $this->logMsg("ERROR: File skipped, not a supported OCR text file (.txt): ".$fileName);
+ $this->logOrEcho("ERROR: File skipped, not a supported OCR text file (.txt): ".$fileName);
}
}
elseif(is_dir($sourcePath.$fileName)){
@@ -384,14 +378,14 @@ private function processOcrFolder($sourcePath){
if($dirFH) closedir($dirFH);
}
else{
- $this->logMsg("ERROR: unable to access source directory: ".$sourcePath,1);
+ $this->logOrEcho("ERROR: unable to access source directory: ".$sourcePath,1);
}
if($this->deleteAllOcrFiles) unlink($sourcePath);
}
private function processOcrFile($fileName,$sourcePath){
$ocrCnt = 0;
- //$this->logMsg('Starting OCR text processing... ',1);
+ //$this->logOrEcho('Starting OCR text processing... ',1);
if($rawTextFH = fopen($sourcePath.$fileName, 'r')){
$rawStr = fread($rawTextFH, filesize($sourcePath.$fileName));
fclose($rawTextFH);
@@ -451,15 +445,15 @@ private function processOcrFile($fileName,$sourcePath){
}
}
else{
- $this->logMsg('ERROR: unable locate specimen image (catalog #: '.$catNumber.')',1);
+ $this->logOrEcho('ERROR: unable locate specimen image (catalog #: '.$catNumber.')',1);
}
}
else{
- $this->logMsg('ERROR: unable to extract catalog number ('.$fileName.' using '.$this->specKeyPattern.')',1);
+ $this->logOrEcho('ERROR: unable to extract catalog number ('.$fileName.' using '.$this->specKeyPattern.')',1);
}
}
else{
- $this->logMsg('ERROR: unable to read rawOcr file: '.$fileName,1);
+ $this->logOrEcho('ERROR: unable to read rawOcr file: '.$fileName,1);
}
}
@@ -600,7 +594,7 @@ private function getBestOCR($sciName = ''){
$score_treated = $this->scoreOCR($rawStr_treated, $sciName);
unlink($urlTemp);
if($score_treated > $score_base) {
- $this->logMsg('Best Score applied',1);
+ $this->logOrEcho('Best Score applied',1);
return $rawStr_treated;
} else {
return $rawStr_base;
@@ -770,20 +764,6 @@ public function setCropH($h){
$this->cropH = $h;
}
- public function getErrorStr(){
- return $this->errorStr;
- }
-
- public function setVerbose($s){
- $this->verbose = $s;
- if($this->verbose == 1 || $this->verbose == 3){
- if($this->tempPath){
- $logPath = $this->tempPath.'log_'.date('Ymd').'.log';
- $this->logFH = fopen($logPath, 'a');
- }
- }
- }
-
private function setTempPath(){
$tempPath = 0;
if(array_key_exists('tempDirRoot',$GLOBALS)){
@@ -810,19 +790,6 @@ private function setTempPath(){
}*/
//Misc functions
- private function logMsg($msg,$indent = 0) {
- if($this->verbose == 1 || $this->verbose == 3){
- if($this->logFH){
- $msg .= "\n";
- if($indent) $msg = "\t".$msg;
- fwrite($this->logFH, $msg);
- }
- }
- elseif($this->verbose > 1 ){
- echo '
'.$msg.'
';
- }
- }
-
private function cleanRawStr($inStr){
$retStr = $this->encodeString($inStr);
@@ -858,56 +825,5 @@ private function cleanRawStr($inStr){
);
return $retStr;
}
-
- private function encodeString($inStr){
- global $CHARSET;
- $retStr = $inStr;
- //Get rid of Windows curly (smart) quotes
- $search = array(chr(145),chr(146),chr(147),chr(148),chr(149),chr(150),chr(151));
- $replace = array("'","'",'"','"','*','-','-');
- $inStr = str_replace($search, $replace, $inStr);
- //Get rid of UTF-8 curly smart quotes and dashes
- $badwordchars=array("\xe2\x80\x98", // left single quote
- "\xe2\x80\x99", // right single quote
- "\xe2\x80\x9c", // left double quote
- "\xe2\x80\x9d", // right double quote
- "\xe2\x80\x94", // em dash
- "\xe2\x80\xa6" // elipses
- );
- $fixedwordchars=array("'", "'", '"', '"', '-', '...');
- $inStr = str_replace($badwordchars, $fixedwordchars, $inStr);
-
- if($inStr){
- if(strtolower($CHARSET) == "utf-8" || strtolower($CHARSET) == "utf8"){
- if(mb_detect_encoding($inStr,'UTF-8,ISO-8859-1',true) == "ISO-8859-1"){
- $retStr = utf8_encode($inStr);
- //$retStr = iconv("ISO-8859-1//TRANSLIT","UTF-8",$inStr);
- }
- }
- elseif(strtolower($CHARSET) == "iso-8859-1"){
- if(mb_detect_encoding($inStr,'UTF-8,ISO-8859-1') == "UTF-8"){
- $retStr = utf8_decode($inStr);
- //$retStr = iconv("UTF-8","ISO-8859-1//TRANSLIT",$inStr);
- }
- }
- //$line = iconv('macintosh', 'UTF-8', $line);
- //mb_detect_encoding($buffer, 'windows-1251, macroman, UTF-8');
- }
- return $retStr;
- }
-
- private function cleanOutStr($str){
- $newStr = str_replace('"',""",$str);
- $newStr = str_replace("'","'",$newStr);
- //$newStr = $this->conn->real_escape_string($newStr);
- return $newStr;
- }
-
- private function cleanInStr($str){
- $newStr = trim($str);
- $newStr = preg_replace('/\s\s+/', ' ',$newStr);
- $newStr = $this->conn->real_escape_string($newStr);
- return $newStr;
- }
}
?>
\ No newline at end of file
diff --git a/classes/SpecUploadBase.php b/classes/SpecUploadBase.php
index de360a7217..72c78f5478 100644
--- a/classes/SpecUploadBase.php
+++ b/classes/SpecUploadBase.php
@@ -145,6 +145,25 @@ public function loadFieldMap($autoBuildFieldMap = false){
//Add additional fields that are used for mapping to other fields just before record is imported into uploadspectemp
$this->symbFields[] = 'coordinateuncertaintyradius';
$this->symbFields[] = 'coordinateuncertaintyunits';
+ //Add DwC GeologicalContext (paleo) terms
+ $this->symbFields[] = 'geologicalcontextid';
+ $this->symbFields[] = 'earliestEonOrLowestEonothem';
+ $this->symbFields[] = 'latestEonOrHighestEonothem';
+ $this->symbFields[] = 'earliestEraOrLowestErathem';
+ $this->symbFields[] = 'latestEraOrHighestErathem';
+ $this->symbFields[] = 'earliestPeriodOrLowestSystem';
+ $this->symbFields[] = 'latestPeriodOrHighestSystem';
+ $this->symbFields[] = 'earliestEpochOrLowestSeries';
+ $this->symbFields[] = 'latestEpochOrHighestSeries';
+ $this->symbFields[] = 'earliestAgeOrLowestStage';
+ $this->symbFields[] = 'latestAgeOrHighestStage';
+ $this->symbFields[] = 'lowestBiostratigraphicZone';
+ $this->symbFields[] = 'highestBiostratigraphicZone';
+ $this->symbFields[] = 'lithostratigraphicTermsProperty';
+ $this->symbFields[] = 'group';
+ $this->symbFields[] = 'formation';
+ $this->symbFields[] = 'member';
+ $this->symbFields[] = 'bed';
switch ($this->uploadType) {
case $this->FILEUPLOAD:
@@ -728,7 +747,7 @@ private function recordCleaningStage2(){
if($this->collMetadataArr["managementtype"] == 'Snapshot' || $this->uploadType == $this->SKELETAL){
//Match records that were processed via the portal, walked back to collection's central database, and come back to portal
$this->outputMsg('
Populating source identifiers (dbpk) to relink specimens processed within portal...
');
- $sql = 'UPDATE uploadspectemp u INNER JOIN omoccurrences o ON (u.catalogNumber = o.catalogNumber) AND (u.collid = o.collid) '.
+ $sql = 'UPDATE IGNORE uploadspectemp u INNER JOIN omoccurrences o ON (u.catalogNumber = o.catalogNumber) AND (u.collid = o.collid) '.
'SET u.occid = o.occid, o.dbpk = u.dbpk '.
'WHERE (u.collid IN('.$this->collId.')) AND (u.occid IS NULL) AND (u.catalogNumber IS NOT NULL) AND (o.catalogNumber IS NOT NULL) AND (o.dbpk IS NULL) ';
$this->conn->query($sql);
@@ -970,7 +989,7 @@ private function prepareImages(){
$sql = 'DELETE FROM uploadimagetemp '.
'WHERE (originalurl LIKE "%.dng" OR originalurl LIKE "%.tif") AND (collid = '.$this->collId.')';
if($this->conn->query($sql)){
- $this->outputMsg('
step 1 of 3...
');
+ $this->outputMsg('
step 1 of 5...
');
}
else{
$this->outputMsg('
WARNING removing non-jpgs from uploadimagetemp: '.$this->conn->error.'
');
@@ -981,7 +1000,7 @@ private function prepareImages(){
'SET ui.occid = u.occid '.
'WHERE (ui.occid IS NULL) AND (u.occid IS NOT NULL) AND (ui.collid = '.$this->collId.')';
if($this->conn->query($sql)){
- $this->outputMsg('
step 2 of 3...
');
+ $this->outputMsg('
step 2 of 5...
');
}
else{
$this->outputMsg('
WARNING updating occids within uploadimagetemp: '.$this->conn->error.'
');
@@ -989,12 +1008,22 @@ private function prepareImages(){
//Remove previously loaded images where urls match exactly
$sql = 'DELETE u.* FROM uploadimagetemp u INNER JOIN images i ON u.occid = i.occid '.
- 'WHERE (u.collid = '.$this->collId.') AND (u.originalurl = i.originalurl)';
+ 'WHERE (u.collid = '.$this->collId.') AND (u.originalurl = i.originalurl) AND (u.url = i.url)';
if($this->conn->query($sql)){
- $this->outputMsg('
step 3 of 3...
');
+ $this->outputMsg('
step 3 of 5...
');
}
else{
- $this->outputMsg('
ERROR deleting uploadimagetemp records with matching originalurls: '.$this->conn->error.'
');
+ $this->outputMsg('
ERROR deleting uploadimagetemp records with matching urls: '.$this->conn->error.'
');
+ }
+ if($this->collMetadataArr["managementtype"] == 'Snapshot'){
+ //Flush non-matching image derivatives (e.g. thumbnails)
+ $sql = 'DELETE i.* FROM uploadimagetemp u INNER JOIN images i ON u.occid = i.occid WHERE (u.collid = '.$this->collId.') AND (u.originalurl = i.originalurl)';
+ if($this->conn->query($sql)){
+ $this->outputMsg('
step 4 of 5...
');
+ }
+ else{
+ $this->outputMsg('
ERROR deleting image records with matching originalurls: '.$this->conn->error.'