Skip to content

Commit

Permalink
Add mostly auto-generated api documentation
Browse files Browse the repository at this point in the history
Change-Id: Ib007e4fbe69d3197a29f478a809fec4e3a541f4e
  • Loading branch information
kupietz committed Sep 7, 2023
1 parent 4128bee commit d1dc848
Show file tree
Hide file tree
Showing 5 changed files with 248 additions and 1 deletion.
15 changes: 15 additions & 0 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,21 @@
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-javadoc-plugin</artifactId>
<version>3.5.0</version>
<configuration>
<doclint>all,-missing</doclint>
<tags>
<tag>
<name>apiNote</name>
<placement>a</placement>
<head>API Note:</head>
</tag>
</tags>
</configuration>
</plugin>
</plugins>
</build>

Expand Down
122 changes: 121 additions & 1 deletion src/main/java/de/ids_mannheim/korap/tokenizer/KorapTokenizer.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,38 +6,102 @@
import java.io.PrintStream;
import java.io.Reader;

/**
* The interface Korap tokenizer.
*
* @author kupietz
* @version $Id: $Id
*/
public interface KorapTokenizer extends opennlp.tools.tokenize.Tokenizer, opennlp.tools.sentdetect.SentenceDetector {
/**
* Scan.
*
* @throws java.io.IOException the io exception
*/
void scan() throws IOException;

/**
* Mainly targeted language(s)
*
* @return list of ISO 639 alpha-2 or alpha-3 language codes
* @apiNote will later be used to find appropriate implementations via reflection
*/
CharSequence[] getTargetLanguages();


/**
* Sets input reader.
*
* @param inputReader the input reader
*/
void setInputReader(Reader inputReader);

/**
* Switches sentence splitting on or off.
*
* @param splitSentences the split sentences flag
*/
void setSplitSentences(boolean splitSentences);

/**
* Switches input echoing on or off.
*
* @param echo the echo flag
*/
void setEcho(boolean echo);

/**
* Switches offset printing on or off.
*
* @param printOffsets the print offsets
*/
void setPrintOffsets(boolean printOffsets);

/**
* Switches token printing on or off.
*
* @param tokenize the tokenize flag
*/
void setPrintTokens(boolean tokenize);

/**
* Sets output stream.
*
* @param outputStream the output stream
*/
void setOutputStream(PrintStream outputStream);

/**
* Switches normalization on or off.
*
* @param normalize the normalize flag
*/
void setNormalize(boolean normalize);

/** {@inheritDoc} */
String[] tokenize(String s);

/** {@inheritDoc} */
Span[] tokenizePos(String s);

/**
* Sent detect string [ ].
*
* @param s the s
* @return the string [ ]
*/
String[] sentDetect(String s);

/**
* Sent pos detect span [ ].
*
* @param s the s
* @return the span [ ]
*/
Span[] sentPosDetect(String s);

/**
* The type Builder.
*/
class Builder {
private boolean splitSentences;
private boolean echo;
Expand All @@ -48,46 +112,102 @@ class Builder {
private Class tokenizerClass;
private Reader inputReader;

/**
* Tokenizer class name builder.
*
* @param tokenizerClassName the tokenizer class name
* @return the builder
* @throws ClassNotFoundException the class not found exception
*/
public Builder tokenizerClassName(String tokenizerClassName) throws ClassNotFoundException {
this.tokenizerClass = Class.forName(tokenizerClassName);
return this;
}

/**
* Split sentences builder.
*
* @param splitSentences the split sentences
* @return the builder
*/
public Builder splitSentences(boolean splitSentences) {
this.splitSentences = splitSentences;
return this;
}

/**
* Sets echo.
*
* @param echo the echo
* @return the echo
*/
public Builder setEcho(boolean echo) {
this.echo = echo;
return this;
}

/**
* Print offsets builder.
*
* @param printOffsets the print offsets
* @return the builder
*/
public Builder printOffsets(boolean printOffsets) {
this.printOffsets = printOffsets;
return this;
}

/**
* Print tokens builder.
*
* @param printTokens the print tokens
* @return the builder
*/
public Builder printTokens(boolean printTokens) {
this.printTokens = printTokens;
return this;
}

/**
* Input reader builder.
*
* @param inputReader the input reader
* @return the builder
*/
public Builder inputReader(Reader inputReader) {
this.inputReader = inputReader;
return this;
}

/**
* Normalize builder.
*
* @param normalize the normalize
* @return the builder
*/
public Builder normalize(boolean normalize) {
this.normalize = normalize;
return this;
}

/**
* Output stream builder.
*
* @param outputStream the output stream
* @return the builder
*/
public Builder outputStream(PrintStream outputStream) {
this.outputStream = outputStream;
return this;
}

/**
* Build korap tokenizer.
*
* @return the korap tokenizer
* @throws IllegalAccessException the illegal access exception
* @throws InstantiationException the instantiation exception
*/
public KorapTokenizer build() throws IllegalAccessException, InstantiationException {
KorapTokenizer korapTokenizer = (KorapTokenizer) tokenizerClass.newInstance();
korapTokenizer.setEcho(echo);
Expand Down
11 changes: 11 additions & 0 deletions src/main/java/de/ids_mannheim/korap/tokenizer/Languages.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
package de.ids_mannheim.korap.tokenizer;

/**
* The interface Languages.
*
* @author kupietz
* @version $Id: $Id
*/
public @interface Languages {
/**
* Value string [ ].
*
* @return the string [ ]
*/
String[] value();
}
Loading

0 comments on commit d1dc848

Please sign in to comment.