Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add bashlib processing worker, require Python 3.7 #1024

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 29 additions & 35 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,76 +16,71 @@ jobs:
steps:
- checkout
- run: HOMEBREW_NO_AUTO_UPDATE=1 brew install imagemagick geos
- run: make install
- run: make deps-test test benchmark

test-python36:
docker:
- image: python:3.6.15
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: pip install -U pip
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: make install deps-test
- run: make test benchmark

test-python37:
docker:
- image: python:3.7.16
- image: cimg/python:3.7
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

test-python38:
docker:
- image: python:3.8.16
- image: cimg/python:3.8
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

test-python39:
docker:
- image: python:3.9.16
- image: cimg/python:3.9
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

test-python310:
docker:
- image: python:3.10.10
- image: cimg/python:3.10
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

test-python311:
docker:
- image: python:3.11.2
- image: cimg/python:3.11
working_directory: ~/ocrd-core
steps:
- checkout
- run: apt-get -y update
- run: make deps-ubuntu install
- run: make deps-test test benchmark
- run: sudo apt-get -y update
- run: sudo make deps-ubuntu
- run: make install deps-test
- run: make test benchmark

deploy:
docker:
- image: circleci/buildpack-deps:stretch
steps:
- checkout
- setup_remote_docker # https://circleci.com/docs/2.0/building-docker-images/
- setup_remote_docker: # https://circleci.com/docs/2.0/building-docker-images/
docker_layer_caching: true
- run: make docker
- run: make docker-cuda
- run:
Expand All @@ -104,7 +99,6 @@ workflows:
only: master
test-pull-request:
jobs:
- test-python36
- test-python37
- test-python38
- test-python39
Expand Down
38 changes: 19 additions & 19 deletions ocrd/ocrd/decorators/ocrd_cli_options.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from click import option
from click import option, Path
from .parameter_option import parameter_option, parameter_override_option
from .loglevel_option import loglevel_option
from ocrd_network import QueueServerParamType, DatabaseParamType
Expand All @@ -19,28 +19,28 @@ def cli(mets_url):
"""
# XXX Note that the `--help` output is statically generate_processor_help
params = [
option('-m', '--mets', help="METS to process", default="mets.xml"),
option('-w', '--working-dir', help="Working Directory"),
option('-m', '--mets', default="mets.xml"),
option('-w', '--working-dir'),
# TODO OCR-D/core#274
# option('-I', '--input-file-grp', help='File group(s) used as input. **required**'),
# option('-O', '--output-file-grp', help='File group(s) used as output. **required**'),
option('-I', '--input-file-grp', help='File group(s) used as input.', default='INPUT'),
option('-O', '--output-file-grp', help='File group(s) used as output.', default='OUTPUT'),
option('-g', '--page-id', help="ID(s) of the pages to process"),
option('--overwrite', help="Overwrite the output file group or a page range (--page-id)", is_flag=True, default=False),
option('--queue', help="The URL of the Queue Server, format: username:password@host:port/vhost", type=QueueServerParamType()),
option('--database', help="The URL of the MongoDB, format: mongodb://host:port", type=DatabaseParamType()),
option('-C', '--show-resource', help='Dump the content of processor resource RESNAME', metavar='RESNAME'),
option('-L', '--list-resources', is_flag=True, default=False, help='List names of processor resources'),
# option('-I', '--input-file-grp', required=True),
# option('-O', '--output-file-grp', required=True),
option('-I', '--input-file-grp', default='INPUT'),
option('-O', '--output-file-grp', default='OUTPUT'),
option('-g', '--page-id'),
option('--overwrite', is_flag=True, default=False),
option('--profile', is_flag=True, default=False),
option('--profile-file', type=Path(dir_okay=False, writable=True)),
kba marked this conversation as resolved.
Show resolved Hide resolved
parameter_option,
parameter_override_option,
option('-J', '--dump-json', help="Dump tool description as JSON and exit", is_flag=True, default=False),
option('-D', '--dump-module-dir', help="Print processor's 'moduledir' of resourcess", is_flag=True, default=False),
loglevel_option,
option('-V', '--version', help="Show version", is_flag=True, default=False),
option('-h', '--help', help="This help message", is_flag=True, default=False),
option('--profile', help="Enable profiling", is_flag=True, default=False),
option('--profile-file', help="Write cProfile stats to this file. Implies --profile"),
option('--queue', type=QueueServerParamType()),
option('--database', type=DatabaseParamType()),
option('-C', '--show-resource'),
option('-L', '--list-resources', is_flag=True, default=False),
option('-J', '--dump-json', is_flag=True, default=False),
option('-D', '--dump-module-dir', is_flag=True, default=False),
option('-h', '--help', is_flag=True, default=False),
option('-V', '--version', is_flag=True, default=False),
]
for param in params:
param(f)
Expand Down
25 changes: 18 additions & 7 deletions ocrd/ocrd/lib.bash
Original file line number Diff line number Diff line change
Expand Up @@ -143,34 +143,45 @@ ocrd__parse_argv () {
--profile) ocrd__argv[profile]=true ;;
--profile-file) ocrd__argv[profile_file]=$(realpath "$2") ; shift ;;
-V|--version) ocrd ocrd-tool "$OCRD_TOOL_JSON" version; exit ;;
--queue) ocrd__worker_queue="$2" ; shift ;;
--database) ocrd__worker_database="$2" ; shift ;;
*) ocrd__raise "Unknown option '$1'" ;;
esac
shift
done

if [[ ! -e "${ocrd__argv[mets_file]}" ]];then
if [ -v ocrd__worker_queue -a -v ocrd__worker_database ]; then
ocrd processing-worker $OCRD_TOOL_NAME --queue "${ocrd__worker_queue}" --database "${ocrd__worker_database}"
exit
elif [ -v ocrd__worker_queue ]; then
ocrd__raise "Processing Worker also requires a --database argument"
elif [ -v ocrd__worker_database ]; then
ocrd__raise "Processing Worker also requires a --queue argument"
fi

if [[ ! -e "${ocrd__argv[mets_file]}" ]]; then
ocrd__raise "METS file '${ocrd__argv[mets_file]}' not found"
fi

if [[ ! -d "${ocrd__argv[working_dir]:=$(dirname "${ocrd__argv[mets_file]}")}" ]];then
if [[ ! -d "${ocrd__argv[working_dir]:=$(dirname "${ocrd__argv[mets_file]}")}" ]]; then
ocrd__raise "workdir '${ocrd__argv[working_dir]}' not a directory. Use -w/--working-dir to set correctly"
fi

if [[ ! "${ocrd__argv[log_level]:=INFO}" =~ OFF|ERROR|WARN|INFO|DEBUG|TRACE ]];then
if [[ ! "${ocrd__argv[log_level]:=INFO}" =~ OFF|ERROR|WARN|INFO|DEBUG|TRACE ]]; then
ocrd__raise "log level '${ocrd__argv[log_level]}' is invalid"
fi

if [[ -z "${ocrd__argv[input_file_grp]:=}" ]];then
if [[ -z "${ocrd__argv[input_file_grp]:=}" ]]; then
ocrd__raise "Provide --input-file-grp/-I explicitly!"
fi

if [[ -z "${ocrd__argv[output_file_grp]:=}" ]];then
if [[ -z "${ocrd__argv[output_file_grp]:=}" ]]; then
ocrd__raise "Provide --output-file-grp/-O explicitly!"
fi

# enable profiling (to be extended/acted upon by caller)
if [[ ${ocrd__argv[profile]} = true ]];then
if [[ -n "${ocrd__argv[profile_file]}" ]];then
if [[ ${ocrd__argv[profile]} = true ]]; then
if [[ -n "${ocrd__argv[profile_file]}" ]]; then
exec 3> "${ocrd__argv[profile_file]}"
else
exec 3>&2
Expand Down
44 changes: 24 additions & 20 deletions ocrd/ocrd/processor/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,11 +240,11 @@ def wrap(s):
if processor_instance:
module = inspect.getmodule(processor_instance)
if module and module.__doc__:
doc_help += '\n' + inspect.cleandoc(module.__doc__)
doc_help += '\n' + inspect.cleandoc(module.__doc__) + '\n'
if processor_instance.__doc__:
doc_help += '\n' + inspect.cleandoc(processor_instance.__doc__)
doc_help += '\n' + inspect.cleandoc(processor_instance.__doc__) + '\n'
if processor_instance.process.__doc__:
doc_help += '\n' + inspect.cleandoc(processor_instance.process.__doc__)
doc_help += '\n' + inspect.cleandoc(processor_instance.process.__doc__) + '\n'
if doc_help:
doc_help = '\n\n' + wrap_text(doc_help, width=72,
initial_indent=' > ',
Expand All @@ -255,43 +255,47 @@ def wrap(s):

%s%s

Options:
Options for processing:
-m, --mets URL-PATH URL or file path of METS to process [./mets.xml]
-w, --working-dir PATH Working directory of local workspace [dirname(URL-PATH)]
-I, --input-file-grp USE File group(s) used as input
-O, --output-file-grp USE File group(s) used as output
-g, --page-id ID Physical page ID(s) to process
-g, --page-id ID Physical page ID(s) to process instead of full document []
--overwrite Remove existing output pages/images
(with --page-id, remove only those)
--queue The RabbitMQ server address in format: {host}:{port}/{vhost}"
--database The MongoDB address in format: mongodb://{host}:{port}"
(with "--page-id", remove only those)
--profile Enable profiling
--profile-file Write cProfile stats to this file. Implies --profile
--profile-file PROF-PATH Write cProfile stats to PROF-PATH. Implies "--profile"
-p, --parameter JSON-PATH Parameters, either verbatim JSON string
or JSON file path
-P, --param-override KEY VAL Override a single JSON object key-value pair,
taking precedence over --parameter
-m, --mets URL-PATH URL or file path of METS to process
-w, --working-dir PATH Working directory of local workspace
taking precedence over "--parameter"
-l, --log-level [OFF|ERROR|WARN|INFO|DEBUG|TRACE]
Log level
Override log level globally [INFO]

Options for Processing Worker server:
--queue The RabbitMQ server address in format
"amqp://{user}:{pass}@{host}:{port}/{vhost}"
[amqp://admin:admin@localhost:5672]
--database The MongoDB server address in format
"mongodb://{host}:{port}"
[mongodb://localhost:27018]
kba marked this conversation as resolved.
Show resolved Hide resolved

Options for information:
-C, --show-resource RESNAME Dump the content of processor resource RESNAME
-L, --list-resources List names of processor resources
-J, --dump-json Dump tool description as JSON and exit
-D, --dump-module-dir Output the 'module' directory with resources for this processor
-h, --help This help message
-J, --dump-json Dump tool description as JSON
-D, --dump-module-dir Show the 'module' resource location path for this processor
-h, --help Show this message
-V, --version Show version

Parameters:
%s
Default Wiring:
%s -> %s
kba marked this conversation as resolved.
Show resolved Hide resolved

''' % (
ocrd_tool['executable'],
ocrd_tool['description'],
doc_help,
parameter_help,
ocrd_tool.get('input_file_grp', 'NONE'),
ocrd_tool.get('output_file_grp', 'NONE')
)


Expand Down
1 change: 1 addition & 0 deletions ocrd/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
license='Apache License 2.0',
packages=find_packages(exclude=('tests', 'docs')),
include_package_data=True,
python_requires=">=3.7",
install_requires=install_requires,
entry_points={
'console_scripts': [
Expand Down
1 change: 1 addition & 0 deletions ocrd_modelfactory/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
author_email='unixprog@gmail.com',
url='https://github.com/OCR-D/core',
license='Apache License 2.0',
python_requires=">=3.7",
install_requires=install_requires,
packages=['ocrd_modelfactory'],
package_data={'': ['*.json', '*.yml', '*.xml']},
Expand Down
1 change: 1 addition & 0 deletions ocrd_models/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
author_email='unixprog@gmail.com',
url='https://github.com/OCR-D/core',
license='Apache License 2.0',
python_requires=">=3.7",
install_requires=install_requires,
packages=['ocrd_models'],
package_data={'': ['*.json', '*.yml', '*.xml']},
Expand Down
7 changes: 1 addition & 6 deletions ocrd_network/ocrd_network/deployer.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
CustomDockerClient,
DeployType,
HostData,
is_bashlib_processor,
)
from .rabbitmq_utils import RMQPublisher

Expand Down Expand Up @@ -281,11 +280,7 @@ def start_native_processor(self, client: SSHClient, processor_name: str, queue_u
self.log.info(f'Starting native processor: {processor_name}')
channel = client.invoke_shell()
stdin, stdout = channel.makefile('wb'), channel.makefile('rb')
if is_bashlib_processor(processor_name):
cmd = f'ocrd processing-worker {processor_name} --database {database_url} ' \
f'--queue {queue_url}'
else:
cmd = f'{processor_name} --database {database_url} --queue {queue_url}'
cmd = f'{processor_name} --database {database_url} --queue {queue_url}'
bertsky marked this conversation as resolved.
Show resolved Hide resolved
# the only way (I could find) to make it work to start a process in the background and
# return early is this construction. The pid of the last started background process is
# printed with `echo $!` but it is printed inbetween other output. Because of that I added
Expand Down
16 changes: 0 additions & 16 deletions ocrd_network/ocrd_network/deployment_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,22 +38,6 @@ def create_docker_client(address: str, username: str, password: Union[str, None]
return CustomDockerClient(username, address, password=password, keypath=keypath)


def is_bashlib_processor(processor_name):
""" Determine if a processor is a bashlib processor

Returns True if processor_name is available as a program and does not contain a python hashbang
in line 1 """
if not processor_name.startswith("ocrd"):
return False
program = which(processor_name)
if not program:
return False
with open(program) as fin:
line = fin.readline().strip()
if re.fullmatch('[#][!].*/python[0-9.]*', line):
return False
return True


class HostData:
"""class to store runtime information for a host
Expand Down
1 change: 1 addition & 0 deletions ocrd_network/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
author_email='unixprog@gmail.com',
url='https://github.com/OCR-D/core',
license='Apache License 2.0',
python_requires=">=3.7",
install_requires=install_requires,
packages=[
'ocrd_network',
Expand Down
1 change: 1 addition & 0 deletions ocrd_utils/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
url='https://github.com/OCR-D/core',
license='Apache License 2.0',
packages=['ocrd_utils'],
python_requires=">=3.7",
install_requires=install_requires,
package_data={'': ['*.json', '*.yml', '*.xml']},
keywords=['OCR', 'OCR-D']
Expand Down
1 change: 1 addition & 0 deletions ocrd_validators/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
author_email='unixprog@gmail.com',
url='https://github.com/OCR-D/core',
license='Apache License 2.0',
python_requires=">=3.7",
install_requires=install_requires,
packages=['ocrd_validators'],
package_data={
Expand Down