Skip to content

Commit

Permalink
Merge pull request #55 from kusumotolab/modify
Browse files Browse the repository at this point in the history
Modify
  • Loading branch information
T45K committed Jan 30, 2021
2 parents 39bd78e + 073bb9f commit 0ce284a
Show file tree
Hide file tree
Showing 36 changed files with 110 additions and 111 deletions.
4 changes: 2 additions & 2 deletions build.gradle.kts
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ dependencies {

application {
// Define the main class for the application.
mainClassName = "io.github.t45k.nil.NILMainKt"
mainClassName = "jp.ac.osaka_u.sdl.nil.NILMainKt"
}

val jar by tasks.getting(Jar::class) {
manifest {
attributes["Main-Class"] = "io.github.t45k.nil.NILMainKt"
attributes["Main-Class"] = "jp.ac.osaka_u.sdl.nil.NILMainKt"
}
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil
package jp.ac.osaka_u.sdl.nil

import java.io.File

Expand All @@ -8,8 +8,8 @@ data class NILConfig(
val minToken: Int = 50,
val gramSize: Int = 5,
val partitionNum: Int = 10,
val filteringThreshold: Int = 10,
val verifyingThreshold: Int = 70,
val filtrationThreshold: Int = 10,
val verificationThreshold: Int = 70,
val outputFileName: String = "result.csv",
val threads: Int = 0,
val isForBigCloneEval: Boolean = false,
Expand All @@ -22,8 +22,8 @@ fun parseArgs(args: Array<String>): NILConfig {
var minToken = 50
var gramSize = 5
var partitionNum = 10
var filteringThreshold = 10
var verifyingThreshold = 70
var filtrationThreshold = 10
var verificationThreshold = 70
var outputFileName: String? = null
var threads = 0
var isForBigCloneEval = false
Expand All @@ -37,8 +37,8 @@ fun parseArgs(args: Array<String>): NILConfig {
"-mit", "--min-token" -> minToken = iterator.next().toIntOrException(optionName)
"-n", "--n-gram" -> gramSize = iterator.next().toIntOrException(optionName)
"-p", "--partition-number" -> partitionNum = iterator.next().toIntOrException(optionName)
"-f", "--filtering-threshold" -> filteringThreshold = iterator.next().toIntOrException(optionName)
"-v", "--verifying-threshold" -> verifyingThreshold = iterator.next().toIntOrException(optionName)
"-f", "--filtration-threshold" -> filtrationThreshold = iterator.next().toIntOrException(optionName)
"-v", "--verification-threshold" -> verificationThreshold = iterator.next().toIntOrException(optionName)
"-o", "--output" -> outputFileName = iterator.next()
"-t", "--threads" -> threads = iterator.next().toInt()
"-bce", "--bigcloneeval" -> isForBigCloneEval = true
Expand All @@ -57,9 +57,9 @@ fun parseArgs(args: Array<String>): NILConfig {
minToken,
gramSize,
partitionNum,
filteringThreshold,
verifyingThreshold,
outputFileName ?: "result_${gramSize}_${filteringThreshold}_${verifyingThreshold}.csv",
filtrationThreshold,
verificationThreshold,
outputFileName ?: "result_${gramSize}_${filtrationThreshold}_${verificationThreshold}.csv",
threads,
isForBigCloneEval,
isForMutationInjectionFramework,
Expand All @@ -81,10 +81,10 @@ class InvalidOptionException(private val option: String) : RuntimeException() {
|-mit, --min-token${'\t'}${'\t'}${'\t'}Minimum token (default: 50)
|-n, --n-gram${'\t'}${'\t'}${'\t'}${'\t'}N of N-gram (default: 5)
|-p, --partition-number${'\t'}${'\t'}${'\t'}The number of partition (default: 10)
|-f, --filtering-threshold${'\t'}${'\t'}Filtering threshold (default: 10%)
|-v, --verifying-threshold${'\t'}${'\t'}Verifying threshold (default: 70%)
|-f, --filtration-threshold${'\t'}${'\t'}Filtration threshold (default: 10%)
|-v, --verification-threshold${'\t'}${'\t'}Verification threshold (default: 70%)
|-o, --output${'\t'}${'\t'}${'\t'}${'\t'}Output file name (default: result_{N-gram}_{filtering_threshold}_{verifying_threshold}.csv)
|-t, --thrads${'\t'}${'\t'}${'\t'}${'\t'}The number of threads used for parallel execution (default: all threads)
|-t, --threads${'\t'}${'\t'}${'\t'}${'\t'}The number of threads used for parallel execution (default: all threads)
|-bce, --bigcloneeval${'\t'}${'\t'}${'\t'}Output result feasible to BigCloneEval (default: false)
|-mif, --mutationinjectionframework${'\t'}Output result feasible to MutationInjectionFramework (default: false)
""".trimMargin()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,19 +1,19 @@
package io.github.t45k.nil
package jp.ac.osaka_u.sdl.nil

import io.github.t45k.nil.entity.HuntSzymanskiLCS
import io.github.t45k.nil.entity.InvertedIndex
import io.github.t45k.nil.entity.TokenSequence
import io.github.t45k.nil.presenter.logger.LoggerWrapperFactory
import io.github.t45k.nil.presenter.output.FormatFactory
import io.github.t45k.nil.usecase.cloneDetection.CloneDetection
import io.github.t45k.nil.usecase.cloneDetection.NGramBasedFiltration
import io.github.t45k.nil.usecase.preprocess.JavaPreprocess
import io.github.t45k.nil.usecase.cloneDetection.NGramBasedLocation
import io.github.t45k.nil.usecase.cloneDetection.LCSBasedVerification
import io.github.t45k.nil.util.parallelIfSpecified
import io.github.t45k.nil.util.toTime
import io.reactivex.rxjava3.core.Flowable
import io.reactivex.rxjava3.schedulers.Schedulers
import jp.ac.osaka_u.sdl.nil.entity.HuntSzymanskiLCS
import jp.ac.osaka_u.sdl.nil.entity.InvertedIndex
import jp.ac.osaka_u.sdl.nil.entity.TokenSequence
import jp.ac.osaka_u.sdl.nil.presenter.logger.LoggerWrapperFactory
import jp.ac.osaka_u.sdl.nil.presenter.output.FormatFactory
import jp.ac.osaka_u.sdl.nil.usecase.cloneDetection.CloneDetection
import jp.ac.osaka_u.sdl.nil.usecase.cloneDetection.LCSBasedVerification
import jp.ac.osaka_u.sdl.nil.usecase.cloneDetection.NGramBasedFiltration
import jp.ac.osaka_u.sdl.nil.usecase.cloneDetection.NGramBasedLocation
import jp.ac.osaka_u.sdl.nil.usecase.preprocess.JavaPreprocess
import jp.ac.osaka_u.sdl.nil.util.parallelIfSpecified
import jp.ac.osaka_u.sdl.nil.util.toTime
import java.io.File

class NILMain(private val config: NILConfig) {
Expand All @@ -33,8 +33,8 @@ class NILMain(private val config: NILConfig) {
logger.infoPreprocessCompletion(tokenSequences.size)

val partitionSize = (tokenSequences.size + config.partitionNum - 1) / config.partitionNum
val filteringPhase = NGramBasedFiltration(config.filteringThreshold)
val verifyingPhase = LCSBasedVerification(HuntSzymanskiLCS(), config.verifyingThreshold)
val filtrationPhase = NGramBasedFiltration(config.filtrationThreshold)
val verificationPhase = LCSBasedVerification(HuntSzymanskiLCS(), config.verificationThreshold)

File(CLONE_PAIR_FILE_NAME).bufferedWriter().use { bw ->
repeat(config.partitionNum) { i ->
Expand All @@ -44,9 +44,9 @@ class NILMain(private val config: NILConfig) {
InvertedIndex.create(partitionSize, config.gramSize, tokenSequences, startIndex)
logger.infoInvertedIndexCreationCompletion(i + 1)

val locatingPhase = NGramBasedLocation(invertedIndex)
val locationPhase = NGramBasedLocation(invertedIndex)
val cloneDetection =
CloneDetection(locatingPhase, filteringPhase, verifyingPhase, tokenSequences, config.gramSize)
CloneDetection(locationPhase, filtrationPhase, verificationPhase, tokenSequences, config.gramSize)
Flowable.range(startIndex + 1, tokenSequences.size - startIndex - 1)
.parallelIfSpecified(config.threads)
.runOn(Schedulers.computation())
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.entity
package jp.ac.osaka_u.sdl.nil.entity

/**
* Code block is a single function.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.entity
package jp.ac.osaka_u.sdl.nil.entity

/**
* Time complexity is O(N log (N)).
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.entity
package jp.ac.osaka_u.sdl.nil.entity

import kotlin.math.min

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.entity
package jp.ac.osaka_u.sdl.nil.entity

interface LCS {
/**
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.entity
package jp.ac.osaka_u.sdl.nil.entity

/**
* NGram is a hash value of N-gram
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.entity
package jp.ac.osaka_u.sdl.nil.entity

import kotlin.math.max

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.entity
package jp.ac.osaka_u.sdl.nil.entity

/**
* TokenSequence is a list of each token's hash value.
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.presenter.logger
package jp.ac.osaka_u.sdl.nil.presenter.logger

interface LoggerWrapper {
fun infoStart()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.presenter.logger
package jp.ac.osaka_u.sdl.nil.presenter.logger

class LoggerWrapperFactory {
companion object {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,15 +1,15 @@
package io.github.t45k.nil.presenter.logger
package jp.ac.osaka_u.sdl.nil.presenter.logger

/**
* This class is for Mutation Framework (https://github.com/jeffsvajlenko/MutationInjectionFramework).
* When clone detectors are validated by Mutation framework,
* the tools' standard output must be only their output file name.
*/
class MFLoggerWrapper(private val outputFileName: String) : LoggerWrapper {
override fun infoStart() {} // NOP
override fun infoPreprocessCompletion(size: Int) {} // NOP
override fun infoInvertedIndexCreationCompletion(partition: Int) {} // NOP
override fun infoCloneDetectionCompletion(partition: Int) {} // NOP
override fun infoStart() {} /* no-op */
override fun infoPreprocessCompletion(size: Int) {} /* no-op */
override fun infoInvertedIndexCreationCompletion(partition: Int) {} /* no-op */
override fun infoCloneDetectionCompletion(partition: Int) {} /* no-op */

override fun infoEnd(time: String) = print(outputFileName)
}
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.presenter.logger
package jp.ac.osaka_u.sdl.nil.presenter.logger

import org.slf4j.Logger
import org.slf4j.LoggerFactory
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.presenter.output
package jp.ac.osaka_u.sdl.nil.presenter.output

import java.io.File

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.presenter.output
package jp.ac.osaka_u.sdl.nil.presenter.output

/**
* NIL's standard output format is "/path/to/file1,start_line1,end_line1,/path/to/file2,start_line2,end_line2",
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.github.t45k.nil.presenter.output
package jp.ac.osaka_u.sdl.nil.presenter.output

import io.github.t45k.nil.NILMain
import io.github.t45k.nil.NILMain.Companion.CODE_BLOCK_FILE_NAME
import jp.ac.osaka_u.sdl.nil.NILMain
import jp.ac.osaka_u.sdl.nil.NILMain.Companion.CODE_BLOCK_FILE_NAME
import java.io.File

abstract class Format {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
package io.github.t45k.nil.presenter.output
package jp.ac.osaka_u.sdl.nil.presenter.output

class FormatFactory {
companion object {
Expand All @@ -9,4 +9,4 @@ class FormatFactory {
CSV()
}
}
}
}
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package io.github.t45k.nil.usecase.cloneDetection
package jp.ac.osaka_u.sdl.nil.usecase.cloneDetection

import io.github.t45k.nil.entity.Id
import io.github.t45k.nil.entity.TokenSequence
import io.github.t45k.nil.entity.toNgrams
import io.reactivex.rxjava3.core.Flowable
import jp.ac.osaka_u.sdl.nil.entity.Id
import jp.ac.osaka_u.sdl.nil.entity.TokenSequence
import jp.ac.osaka_u.sdl.nil.entity.toNgrams

class CloneDetection(
private val locatingPhase: Location,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.github.t45k.nil.usecase.cloneDetection
package jp.ac.osaka_u.sdl.nil.usecase.cloneDetection

import io.github.t45k.nil.entity.NGramInfo
import jp.ac.osaka_u.sdl.nil.entity.NGramInfo

interface Filtration {
fun filter(nGramSize: Int, cloneCandidate: Map.Entry<NGramInfo, Int>): Boolean
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
package io.github.t45k.nil.usecase.cloneDetection
package jp.ac.osaka_u.sdl.nil.usecase.cloneDetection

import io.github.t45k.nil.entity.LCS
import io.github.t45k.nil.entity.TokenSequence
import jp.ac.osaka_u.sdl.nil.entity.LCS
import jp.ac.osaka_u.sdl.nil.entity.TokenSequence
import kotlin.math.min

class LCSBasedVerification(private val lcs: LCS, private val threshold: Int) : Verification {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package io.github.t45k.nil.usecase.cloneDetection
package jp.ac.osaka_u.sdl.nil.usecase.cloneDetection

import io.github.t45k.nil.entity.NGramInfo
import io.github.t45k.nil.entity.NGrams
import io.reactivex.rxjava3.core.Flowable
import jp.ac.osaka_u.sdl.nil.entity.NGramInfo
import jp.ac.osaka_u.sdl.nil.entity.NGrams

interface Location {
fun locate(nGrams: NGrams, index: Int): Flowable<Map.Entry<NGramInfo, Int>>
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.github.t45k.nil.usecase.cloneDetection
package jp.ac.osaka_u.sdl.nil.usecase.cloneDetection

import io.github.t45k.nil.entity.NGramInfo
import jp.ac.osaka_u.sdl.nil.entity.NGramInfo
import kotlin.math.min

class NGramBasedFiltration(private val threshold: Int) : Filtration {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
package io.github.t45k.nil.usecase.cloneDetection
package jp.ac.osaka_u.sdl.nil.usecase.cloneDetection

import io.github.t45k.nil.entity.InvertedIndex
import io.github.t45k.nil.entity.NGramInfo
import io.github.t45k.nil.entity.NGrams
import io.reactivex.rxjava3.core.Flowable
import io.reactivex.rxjava3.kotlin.toFlowable
import jp.ac.osaka_u.sdl.nil.entity.InvertedIndex
import jp.ac.osaka_u.sdl.nil.entity.NGramInfo
import jp.ac.osaka_u.sdl.nil.entity.NGrams

class NGramBasedLocation(private val invertedIndex: InvertedIndex) : Location {
override fun locate(nGrams: NGrams, index: Int): Flowable<Map.Entry<NGramInfo, Int>> =
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
package io.github.t45k.nil.usecase.cloneDetection
package jp.ac.osaka_u.sdl.nil.usecase.cloneDetection

import io.github.t45k.nil.entity.TokenSequence
import jp.ac.osaka_u.sdl.nil.entity.TokenSequence

interface Verification {
fun verify(tokenSequence1: TokenSequence, tokenSequence2: TokenSequence): Boolean
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
package io.github.t45k.nil.usecase.preprocess
package jp.ac.osaka_u.sdl.nil.usecase.preprocess

import io.github.t45k.nil.NILConfig
import io.github.t45k.nil.entity.CodeBlock
import io.github.t45k.nil.util.LexicalAnalyzer
import io.reactivex.rxjava3.core.BackpressureStrategy
import io.reactivex.rxjava3.core.Flowable
import io.reactivex.rxjava3.core.Observable
import jp.ac.osaka_u.sdl.nil.NILConfig
import jp.ac.osaka_u.sdl.nil.entity.CodeBlock
import jp.ac.osaka_u.sdl.nil.util.LexicalAnalyzer
import org.eclipse.core.runtime.NullProgressMonitor
import org.eclipse.jdt.core.JavaCore
import org.eclipse.jdt.core.dom.AST.JLS14
Expand Down Expand Up @@ -33,18 +33,16 @@ class JavaParser(private val tokenizer: (String) -> List<Int>, private val confi

val fileName = sourceFile.canonicalPath
object : ASTVisitor() {
override fun visit(node: MethodDeclaration?): Boolean {
node?.also {
val startLine = if (it.javadoc == null) {
compilationUnit.getLineNumber(it.startPosition)
} else {
compilationUnit.getLineNumber(node.getNodeNextToJavaDoc().startPosition)
}
val endLine = compilationUnit.getLineNumber(it.startPosition + it.length)
it.javadoc = null
if (endLine - startLine + 1 >= config.minLine && LexicalAnalyzer.countTokens(it.toString()) >= config.minToken) {
emitter.onNext(CodeBlock(fileName, startLine, endLine, tokenizer(it.toString())))
}
override fun visit(node: MethodDeclaration): Boolean {
val startLine = if (node.javadoc == null) {
compilationUnit.getLineNumber(node.startPosition)
} else {
compilationUnit.getLineNumber(node.getNodeNextToJavaDoc().startPosition)
}
val endLine = compilationUnit.getLineNumber(node.startPosition + node.length)
node.javadoc = null
if (endLine - startLine + 1 >= config.minLine && LexicalAnalyzer.countTokens(node.toString()) >= config.minToken) {
emitter.onNext(CodeBlock(fileName, startLine, endLine, tokenizer(node.toString())))
}
return false
}
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
package io.github.t45k.nil.usecase.preprocess
package jp.ac.osaka_u.sdl.nil.usecase.preprocess

import io.github.t45k.nil.NILConfig
import io.github.t45k.nil.entity.CodeBlock
import io.reactivex.rxjava3.core.Flowable
import io.reactivex.rxjava3.kotlin.toFlowable
import jp.ac.osaka_u.sdl.nil.NILConfig
import jp.ac.osaka_u.sdl.nil.entity.CodeBlock
import java.io.File

class JavaPreprocess(private val config: NILConfig) : Preprocess(config.threads) {
Expand Down
Loading

0 comments on commit 0ce284a

Please sign in to comment.