forked from pool/languagetool
new package OBS-URL: https://build.opensuse.org/request/show/789637 OBS-URL: https://build.opensuse.org/package/show/Java:packages/languagetool?expand=0&rev=1
1029 lines
42 KiB
Diff
1029 lines
42 KiB
Diff
--- languagetool-4.8/languagetool-core/pom.xml 2019-12-27 11:17:28.000000000 +0100
|
|
+++ languagetool-4.8/languagetool-core/pom.xml 2020-01-07 09:32:01.278033500 +0100
|
|
@@ -106,6 +106,11 @@
|
|
<version>28.1-jre</version>
|
|
</dependency>
|
|
<dependency>
|
|
+ <groupId>net.java.dev.jna</groupId>
|
|
+ <artifactId>jna</artifactId>
|
|
+ <version>4.5.2</version>
|
|
+ </dependency>
|
|
+ <dependency>
|
|
<groupId>org.carrot2</groupId>
|
|
<artifactId>morfologik-fsa</artifactId>
|
|
<version>${morfologik.version}</version>
|
|
@@ -218,13 +223,6 @@
|
|
<artifactId>slf4j-api</artifactId>
|
|
<version>1.7.25</version>
|
|
</dependency>
|
|
-
|
|
- <dependency>
|
|
- <groupId>com.gitlab.dumonts</groupId>
|
|
- <artifactId>hunspell</artifactId>
|
|
- <version>1.1.0</version>
|
|
- </dependency>
|
|
-
|
|
<dependency>
|
|
<groupId>ch.qos.logback</groupId>
|
|
<artifactId>logback-classic</artifactId>
|
|
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java 2019-12-27 11:17:28.000000000 +0100
|
|
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/CompoundAwareHunspellRule.java 2020-01-07 09:32:01.278033500 +0100
|
|
@@ -143,7 +143,7 @@
|
|
int partCount = 0;
|
|
List<String> candidates = new ArrayList<>();
|
|
for (String part : parts) {
|
|
- if (!hunspell.spell(part)) {
|
|
+ if (hunspellDict.misspelled(part)) {
|
|
// assume noun, so use uppercase:
|
|
boolean doUpperCase = partCount > 0 && !StringTools.startsWithUppercase(part);
|
|
List<String> suggestions = morfoSpeller.getSuggestions(doUpperCase ? StringTools.uppercaseFirstChar(part) : part);
|
|
@@ -213,7 +213,7 @@
|
|
String[] words = tokenizeText(wordOrPhrase);
|
|
boolean wordIsOkay = true;
|
|
for (String word : words) {
|
|
- if (!hunspell.spell(word)) {
|
|
+ if (hunspellDict.misspelled(word)) {
|
|
wordIsOkay = false;
|
|
break;
|
|
}
|
|
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/Hunspell.java 2019-12-27 11:17:28.000000000 +0100
|
|
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/Hunspell.java 2020-01-07 09:32:01.278033500 +0100
|
|
@@ -1,132 +1,418 @@
|
|
package org.languagetool.rules.spelling.hunspell;
|
|
|
|
-import dumonts.hunspell.bindings.HunspellLibrary;
|
|
-import org.bridj.Pointer;
|
|
-
|
|
-import java.io.Closeable;
|
|
+import java.io.File;
|
|
+import java.io.FileNotFoundException;
|
|
+import java.io.FileOutputStream;
|
|
import java.io.IOException;
|
|
import java.io.InputStream;
|
|
-import java.nio.charset.Charset;
|
|
-import java.nio.file.Files;
|
|
-import java.nio.file.Path;
|
|
-import java.nio.file.StandardCopyOption;
|
|
-import java.util.*;
|
|
-import java.util.stream.Collectors;
|
|
-
|
|
-public class Hunspell implements Closeable {
|
|
- private final Pointer<HunspellLibrary.Hunhandle> handle;
|
|
- private final Charset charset;
|
|
-
|
|
- private static final Map<LanguageAndPath, Hunspell> map = new HashMap<>();
|
|
-
|
|
- static class LanguageAndPath {
|
|
- private final Path dictionary;
|
|
- private final Path affix;
|
|
- LanguageAndPath(Path dictionary, Path affix) {
|
|
- this.dictionary = Objects.requireNonNull(dictionary);
|
|
- this.affix = Objects.requireNonNull(affix);
|
|
- }
|
|
- @Override
|
|
- public boolean equals(Object o) {
|
|
- if (this == o) return true;
|
|
- if (o == null || getClass() != o.getClass()) return false;
|
|
- LanguageAndPath that = (LanguageAndPath) o;
|
|
- return Objects.equals(dictionary, that.dictionary) &&
|
|
- Objects.equals(affix, that.affix);
|
|
- }
|
|
- @Override
|
|
- public int hashCode() {
|
|
- return Objects.hash(dictionary, affix);
|
|
- }
|
|
+import java.io.UnsupportedEncodingException;
|
|
+import java.nio.CharBuffer;
|
|
+import java.nio.charset.CharacterCodingException;
|
|
+import java.util.ArrayList;
|
|
+import java.util.Arrays;
|
|
+import java.util.HashMap;
|
|
+import java.util.List;
|
|
+import java.util.Scanner;
|
|
+
|
|
+import com.sun.jna.Native;
|
|
+import com.sun.jna.Pointer;
|
|
+import com.sun.jna.ptr.PointerByReference;
|
|
+
|
|
+/**
|
|
+ * The simple hunspell library frontend which takes care of creating
|
|
+ * and singleton'ing the library instance (no need to load it more than once
|
|
+ * per process).
|
|
+ *
|
|
+ * The Hunspell java bindings are licensed under the same terms as Hunspell itself (GPL/LGPL/MPL tri-license),
|
|
+ * see the file COPYING.txt in the root of the distribution for the exact terms.
|
|
+ *
|
|
+ * @author Flemming Frandsen (flfr at stibo dot com)
|
|
+ */
|
|
+
|
|
+public class Hunspell {
|
|
+
|
|
+ /**
|
|
+ * The Singleton instance of Hunspell
|
|
+ */
|
|
+ private static Hunspell hunspell = null;
|
|
+
|
|
+ /**
|
|
+ * The native library instance, created by JNA.
|
|
+ */
|
|
+ private HunspellLibrary hsl = null;
|
|
+
|
|
+ /**
|
|
+ * The library file that was loaded.
|
|
+ */
|
|
+ private String libFile;
|
|
+
|
|
+ /**
|
|
+ * The instance of the HunspellManager, looks for the native lib in the
|
|
+ * default directories
|
|
+ */
|
|
+ public static Hunspell getInstance() throws UnsatisfiedLinkError, UnsupportedOperationException {
|
|
+ return getInstance(null);
|
|
+ }
|
|
+
|
|
+ /**
|
|
+ * The instance of the HunspellManager, looks for the native lib in
|
|
+ * the directory specified.
|
|
+ *
|
|
+ * @param libDir Optional absolute directory where the native lib can be found.
|
|
+ */
|
|
+ public static synchronized Hunspell getInstance(String libDir) throws UnsatisfiedLinkError, UnsupportedOperationException {
|
|
+ if (hunspell != null) {
|
|
+ return hunspell;
|
|
}
|
|
|
|
- public Hunspell(Path dictionary, Path affix) {
|
|
- Pointer<Byte> aff = Pointer.pointerToCString(affix.toString());
|
|
- Pointer<Byte> dic = Pointer.pointerToCString(dictionary.toString());
|
|
- handle = HunspellLibrary.Hunspell_create(aff, dic);
|
|
- charset = Charset.forName(HunspellLibrary.Hunspell_get_dic_encoding(handle).getCString());
|
|
- if (this.handle == null) {
|
|
- throw new RuntimeException("Unable to create Hunspell instance");
|
|
- }
|
|
+ hunspell = new Hunspell(libDir);
|
|
+ return hunspell;
|
|
}
|
|
|
|
- public synchronized static Hunspell getInstance(Path dictionary, Path affix) {
|
|
- LanguageAndPath key = new LanguageAndPath(dictionary, affix);
|
|
- Hunspell hunspell = map.get(key);
|
|
- if (hunspell != null) {
|
|
- return hunspell;
|
|
+ protected void tryLoad(String libFile) throws UnsupportedOperationException {
|
|
+ hsl = (HunspellLibrary)Native.loadLibrary(libFile, HunspellLibrary.class);
|
|
}
|
|
- Hunspell newHunspell = new Hunspell(dictionary, affix);
|
|
- map.put(key, newHunspell);
|
|
- return newHunspell;
|
|
+
|
|
+
|
|
+ /**
|
|
+ * Constructor for the library, loads the native lib.
|
|
+ *
|
|
+ * Loading is done in the first of the following three ways that works:
|
|
+ * 1) Unmodified load in the provided directory.
|
|
+ * 2) libFile stripped back to the base name (^lib(.*)\.so on unix)
|
|
+ * 3) The library is searched for in the classpath, extracted to disk and loaded.
|
|
+ *
|
|
+ * @param libDir Optional absolute directory where the native lib can be found.
|
|
+ * @throws UnsupportedOperationException if the OS or architecture is simply not supported.
|
|
+ */
|
|
+ protected Hunspell(String libDir) throws UnsatisfiedLinkError, UnsupportedOperationException {
|
|
+
|
|
+ libFile = libDir != null ? libDir+"/"+libName() : libNameBare();
|
|
+ try {
|
|
+ hsl = (HunspellLibrary)Native.loadLibrary(libFile, HunspellLibrary.class);
|
|
+ } catch (UnsatisfiedLinkError urgh) {
|
|
+
|
|
+ // Oh dear, the library was not found in the file system, let's try the classpath
|
|
+ libFile = libName();
|
|
+ InputStream is = Hunspell.class.getResourceAsStream("/"+libFile);
|
|
+ if (is == null) {
|
|
+ throw new UnsatisfiedLinkError("Can't find "+libFile+
|
|
+ " in the filesystem nor in the classpath\n"+
|
|
+ urgh);
|
|
}
|
|
|
|
- public static Hunspell forDictionaryInResources(String language, String resourcePath) {
|
|
+ // Extract the library from the classpath into a temp file.
|
|
+ File lib;
|
|
+ FileOutputStream fos = null;
|
|
try {
|
|
- ClassLoader loader = Hunspell.class.getClassLoader();
|
|
- InputStream dictionaryStream = loader.getResourceAsStream(resourcePath + language + ".dic");
|
|
- InputStream affixStream = loader.getResourceAsStream(resourcePath + language + ".aff");
|
|
- if (dictionaryStream == null || affixStream == null) {
|
|
- throw new RuntimeException("Could not find dictionary for language \"" + language + "\" in classpath");
|
|
- }
|
|
- Path dictionary = Files.createTempFile(language, ".dic");
|
|
- Path affix = Files.createTempFile(language, ".aff");
|
|
- Files.copy(dictionaryStream, dictionary, StandardCopyOption.REPLACE_EXISTING);
|
|
- Files.copy(affixStream, affix, StandardCopyOption.REPLACE_EXISTING);
|
|
- return new Hunspell(dictionary, affix);
|
|
+ lib = File.createTempFile("jna", "."+libFile);
|
|
+ lib.deleteOnExit();
|
|
+ fos = new FileOutputStream(lib);
|
|
+ int count;
|
|
+ byte[] buf = new byte[1024];
|
|
+ while ((count = is.read(buf, 0, buf.length)) > 0) {
|
|
+ fos.write(buf, 0, count);
|
|
+ }
|
|
+
|
|
} catch (IOException e) {
|
|
- throw new RuntimeException("Could not create temporary dictionaries for language \"" + language + "\"", e);
|
|
+ throw new Error("Failed to create temporary file for "+libFile, e);
|
|
+
|
|
+ } finally {
|
|
+ try { is.close(); } catch(IOException e) { }
|
|
+ if (fos != null) {
|
|
+ try { fos.close(); } catch(IOException e) { }
|
|
+ }
|
|
+ }
|
|
+ //System.out.println("Loading temp lib: "+lib.getAbsolutePath());
|
|
+ hsl = (HunspellLibrary)Native.loadLibrary(lib.getAbsolutePath(), HunspellLibrary.class);
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public String getLibFile() {
|
|
+ return libFile;
|
|
+ }
|
|
+
|
|
+ /**
|
|
+ * Calculate the filename of the native hunspell lib.
|
|
+ * The files have completely different names to allow them to live
|
|
+ * in the same directory and avoid confusion.
|
|
+ */
|
|
+ public static String libName() throws UnsupportedOperationException {
|
|
+ String os = System.getProperty("os.name").toLowerCase();
|
|
+ if (os.startsWith("windows")) {
|
|
+ return libNameBare()+".dll";
|
|
+
|
|
+ } else if (os.startsWith("mac os x")) {
|
|
+ // return libNameBare()+".dylib";
|
|
+ return libNameBare()+".jnilib";
|
|
+
|
|
+ } else {
|
|
+ return "lib"+libNameBare()+".so";
|
|
+ }
|
|
+ }
|
|
+
|
|
+ public static String libNameBare() throws UnsupportedOperationException {
|
|
+ String os = System.getProperty("os.name").toLowerCase();
|
|
+ String arch = System.getProperty("os.arch").toLowerCase();
|
|
+
|
|
+ // Annoying that Java doesn't have consistent names for the arch types:
|
|
+ boolean x86 = arch.equals("x86") || arch.equals("i386") || arch.equals("i686");
|
|
+ boolean amd64= arch.equals("x86_64") || arch.equals("amd64") || arch.equals("ia64n");
|
|
+
|
|
+ if (os.startsWith("windows")) {
|
|
+ if (x86) {
|
|
+ return "hunspell-win-x86-32";
|
|
+ }
|
|
+ if (amd64) {
|
|
+ return "hunspell-win-x86-64";
|
|
+ }
|
|
+
|
|
+ } else if (os.startsWith("mac os x")) {
|
|
+ if (x86) {
|
|
+ return "hunspell-darwin-x86-32";
|
|
+ }
|
|
+ if (amd64) {
|
|
+ return "hunspell-darwin-x86-64";
|
|
+ }
|
|
+ if (arch.equals("ppc")) {
|
|
+ return "hunspell-darwin-ppc-32";
|
|
+ }
|
|
+
|
|
+ } else if (os.startsWith("linux")) {
|
|
+ if (x86) {
|
|
+ return "hunspell-linux-x86-32";
|
|
+ }
|
|
+ if (amd64) {
|
|
+ return "hunspell-linux-x86-64";
|
|
+ }
|
|
+
|
|
+ } else if (os.startsWith("sunos")) {
|
|
+ //if (arch.equals("sparc")) {
|
|
+ // return "hunspell-sunos-sparc-64";
|
|
+ //}
|
|
+
|
|
+ } else if (os.startsWith("freebsd")) {
|
|
+ // Patch by Koen Vervloesem - FreeBSD is not supported yet, but: "... not a real solution, but
|
|
+ // having this fixed makes it easier for me to build new LanguageTool releases without always
|
|
+ // having to apply a local patch first."
|
|
+ if (x86) {
|
|
+ return "hunspell-freebsd-x86-32";
|
|
+ }
|
|
+ if (amd64) {
|
|
+ return "hunspell-freebsd-x86-64";
|
|
+ }
|
|
+
|
|
+ } else if (os.startsWith("aix")) {
|
|
+ // added by Martin Kallinger (https://github.com/languagetool-org/languagetool/pull/1090)
|
|
+ return "hunspell-ppc64";
|
|
}
|
|
+
|
|
+ throw new UnsupportedOperationException("Unknown OS/arch: "+os+"/"+arch);
|
|
}
|
|
|
|
- public static Hunspell forDictionaryInResources(String language) {
|
|
- return forDictionaryInResources(language, "");
|
|
+ /**
|
|
+ * This is the cache where we keep the already loaded dictionaries around
|
|
+ */
|
|
+ private HashMap<String, Dictionary> map = new HashMap<>();
|
|
+
|
|
+
|
|
+ private static CharBuffer ensureCapacity(CharBuffer buffer, int capacity) {
|
|
+ if (buffer == null || buffer.capacity() < capacity) {
|
|
+ buffer = CharBuffer.allocate(capacity);
|
|
+ }
|
|
+ return buffer;
|
|
}
|
|
|
|
- public boolean spell(String word) {
|
|
- if (handle == null) {
|
|
- throw new RuntimeException("Attempt to use hunspell instance after closing");
|
|
+ /**
|
|
+ * Gets an instance of the dictionary.
|
|
+ *
|
|
+ * @param baseFileName the base name of the dictionary,
|
|
+ * passing /dict/da_DK means that the files /dict/da_DK.dic
|
|
+ * and /dict/da_DK.aff get loaded
|
|
+ */
|
|
+ public Dictionary getDictionary(String baseFileName)
|
|
+ throws IOException {
|
|
+
|
|
+ if (map.containsKey(baseFileName)) {
|
|
+ return map.get(baseFileName);
|
|
+
|
|
+ } else {
|
|
+ Dictionary d = new Dictionary(baseFileName);
|
|
+ map.put(baseFileName, d);
|
|
+ return d;
|
|
+ }
|
|
+ }
|
|
+
|
|
+ /**
|
|
+ * Removes a dictionary from the internal cache
|
|
+ *
|
|
+ * @param baseFileName the base name of the dictionary, as passed to
|
|
+ * getDictionary()
|
|
+ */
|
|
+ public void destroyDictionary(String baseFileName) {
|
|
+ if (map.containsKey(baseFileName)) {
|
|
+ map.remove(baseFileName);
|
|
+ }
|
|
}
|
|
- @SuppressWarnings("unchecked")
|
|
- Pointer<Byte> str = (Pointer<Byte>) Pointer.pointerToString(word, Pointer.StringType.C, charset);
|
|
- int result = HunspellLibrary.Hunspell_spell(handle, str);
|
|
- return result != 0;
|
|
+
|
|
+ /**
|
|
+ * Class representing a single dictionary.
|
|
+ */
|
|
+ public class Dictionary {
|
|
+ /**
|
|
+ * The pointer to the hunspell object as returned by the hunspell
|
|
+ * constructor.
|
|
+ */
|
|
+ private Pointer hunspellDict = null;
|
|
+
|
|
+ /**
|
|
+ * The encoding used by this dictionary
|
|
+ */
|
|
+ private String encoding;
|
|
+
|
|
+ /*
|
|
+ * the tokenization characters
|
|
+ */
|
|
+ private final String wordChars;
|
|
+
|
|
+ /**
|
|
+ * Creates an instance of the dictionary.
|
|
+ * @param baseFileName the base name of the dictionary,
|
|
+ */
|
|
+ Dictionary(String baseFileName) throws IOException {
|
|
+ File dic = new File(baseFileName + ".dic");
|
|
+ File aff = new File(baseFileName + ".aff");
|
|
+
|
|
+ if (!dic.canRead() || !aff.canRead()) {
|
|
+ throw new FileNotFoundException("The dictionary files "+
|
|
+ baseFileName+
|
|
+ "(.aff|.dic) could not be read");
|
|
}
|
|
|
|
- public void add(String word) {
|
|
- if (handle == null) {
|
|
- throw new RuntimeException("Attempt to use hunspell instance after closing");
|
|
+ hunspellDict = hsl.Hunspell_create(aff.toString(), dic.toString());
|
|
+ encoding = hsl.Hunspell_get_dic_encoding(hunspellDict);
|
|
+
|
|
+ //hunspell uses non-standard names of charsets
|
|
+ if ("microsoft1251".equals(encoding)) {
|
|
+ encoding = "windows-1251";
|
|
+ } else if ("ISCII-DEVANAGARI".equals(encoding)) {
|
|
+ encoding = "ISCII91";
|
|
}
|
|
- @SuppressWarnings("unchecked")
|
|
- Pointer<Byte> str = (Pointer<Byte>) Pointer.pointerToString(word, Pointer.StringType.C, charset);
|
|
- HunspellLibrary.Hunspell_add(handle, str);
|
|
+
|
|
+ wordChars = getWordCharsFromFile(aff);
|
|
}
|
|
|
|
- public List<String> suggest(String word) {
|
|
- // Create pointer to native string
|
|
- @SuppressWarnings("unchecked")
|
|
- Pointer<Byte> str = (Pointer<Byte>) Pointer.pointerToString(word, Pointer.StringType.C, charset);
|
|
- // Create pointer to native string array
|
|
- Pointer<Pointer<Pointer<Byte>>> nativeSuggestionArray = Pointer.allocatePointerPointer(Byte.class);
|
|
- // Hunspell will allocate the array and fill it with suggestions
|
|
- int suggestionCount = HunspellLibrary.Hunspell_suggest(handle, nativeSuggestionArray, str);
|
|
- if (suggestionCount == 0) {
|
|
- // Return early and don't try to free the array
|
|
- return new ArrayList<>();
|
|
+ /**
|
|
+ * Deallocate the dictionary.
|
|
+ */
|
|
+ public void destroy() {
|
|
+ if (hsl != null && hunspellDict != null) {
|
|
+ hsl.Hunspell_destroy(hunspellDict);
|
|
+ hunspellDict = null;
|
|
+ }
|
|
}
|
|
- // Ask bridj for a `java.util.List` that wraps `nativeSuggestionArray`
|
|
- List<Pointer<Byte>> nativeSuggestionList = nativeSuggestionArray.get().validElements(suggestionCount).asList();
|
|
- // Convert C Strings to java strings
|
|
- List<String> suggestions = nativeSuggestionList.stream().map((p) -> p.getStringAtOffset(0, Pointer.StringType.C, charset)).collect(Collectors.toList());
|
|
|
|
- // We can free the underlying buffer now because Java's `String` owns it's own memory
|
|
- HunspellLibrary.Hunspell_free_list(handle, nativeSuggestionArray, suggestionCount);
|
|
- return suggestions;
|
|
+ /**
|
|
+ * Used to query what are word-characters
|
|
+ * @return A string composed of characters that are parts of words,
|
|
+ * even if they are not alphabetic.
|
|
+ */
|
|
+ public String getWordChars() {
|
|
+ return wordChars;
|
|
}
|
|
|
|
- public void close() {
|
|
- if (handle != null) {
|
|
- HunspellLibrary.Hunspell_destroy(handle);
|
|
+ /**
|
|
+ * Check if a word is spelled correctly
|
|
+ *
|
|
+ * @param word The word to check.
|
|
+ * @return true if the <code>word</code> is not correctly spelled
|
|
+ */
|
|
+ public boolean misspelled(String word) {
|
|
+ try {
|
|
+ final byte[] wordAsBytes = stringToBytes(word);
|
|
+ if (wordAsBytes.length == 0 && word.length() > 0) {
|
|
+ return true;
|
|
+ }
|
|
+ return (hsl.Hunspell_spell(hunspellDict, wordAsBytes) == 0);
|
|
+ } catch (UnsupportedEncodingException e) {
|
|
+ return true;
|
|
}
|
|
}
|
|
+
|
|
+ /**
|
|
+ * Convert a Java string to a zero terminated byte array, in the
|
|
+ * encoding of the dictionary, as expected by the hunspell functions.
|
|
+ */
|
|
+ protected byte[] stringToBytes(String str) throws UnsupportedEncodingException {
|
|
+ byte[] strBytes = str.getBytes(encoding);
|
|
+ byte[] zeroTerminated = Arrays.copyOf(strBytes, strBytes.length + 1);
|
|
+ zeroTerminated[zeroTerminated.length - 1] = '\u0000';
|
|
+ return zeroTerminated;
|
|
+ }
|
|
+
|
|
+ /**
|
|
+ * Returns a list of suggestions
|
|
+ *
|
|
+ * @param word The word to check and offer suggestions for
|
|
+ */
|
|
+ public List<String> suggest(String word) throws CharacterCodingException {
|
|
+ List<String> res = new ArrayList<>();
|
|
+ try {
|
|
+ int suggestionsCount = 0;
|
|
+ PointerByReference suggestions = new PointerByReference();
|
|
+ final byte[] wordAsBytes = stringToBytes(word);
|
|
+ if (wordAsBytes.length == 0 && word.length() > 0) {
|
|
+ return res;
|
|
+ }
|
|
+ suggestionsCount = hsl.Hunspell_suggest(
|
|
+ hunspellDict, suggestions, stringToBytes(word));
|
|
+ if (suggestionsCount == 0) {
|
|
+ return res;
|
|
+ }
|
|
+
|
|
+ // Get each of the suggestions out of the pointer array.
|
|
+ Pointer[] pointerArray = suggestions.getValue().
|
|
+ getPointerArray(0, suggestionsCount);
|
|
+
|
|
+ for (int i=0; i<suggestionsCount; i++) {
|
|
+ long len = pointerArray[i].indexOf(0, (byte)0);
|
|
+ if (len != -1) {
|
|
+ if (len > Integer.MAX_VALUE) {
|
|
+ throw new RuntimeException(
|
|
+ "String improperly terminated: " + len);
|
|
+ }
|
|
+ byte[] data = pointerArray[i].getByteArray(0, (int)len);
|
|
+
|
|
+ res.add(new String(data, encoding));
|
|
+ }
|
|
+ }
|
|
+
|
|
+ } catch (UnsupportedEncodingException ex) { } // Shouldn't happen...
|
|
+
|
|
+ return res;
|
|
+ }
|
|
+
|
|
+ private String getWordCharsFromFile(final File affixFile) throws IOException {
|
|
+ String affixWordChars = "";
|
|
+ try (Scanner scanner = new Scanner(affixFile, encoding)) {
|
|
+ while (scanner.hasNextLine()) {
|
|
+ final String line = scanner.nextLine().trim();
|
|
+ if (line.startsWith("WORDCHARS ")) {
|
|
+ affixWordChars = line.substring("WORDCHARS ".length());
|
|
+ }
|
|
+ }
|
|
+ }
|
|
+ return affixWordChars;
|
|
+ }
|
|
+
|
|
+ /**
|
|
+ * Adds a word to the runtime dictionary.
|
|
+ * @param word Word to be added.
|
|
+ */
|
|
+ public void addWord(final String word) throws UnsupportedEncodingException {
|
|
+ hsl.Hunspell_add(hunspellDict, stringToBytes(word));
|
|
+ }
|
|
+
|
|
+ }
|
|
+
|
|
}
|
|
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/HunspellLibrary.java 1970-01-01 01:00:00.000000000 +0100
|
|
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/HunspellLibrary.java 2020-01-07 09:32:01.278033500 +0100
|
|
@@ -0,0 +1,67 @@
|
|
+package org.languagetool.rules.spelling.hunspell;
|
|
+
|
|
+import com.sun.jna.Library;
|
|
+import com.sun.jna.Pointer;
|
|
+import com.sun.jna.ptr.PointerByReference;
|
|
+
|
|
+/**
|
|
+ * Functions from $hunspell/src/hunspell/hunspell.h
|
|
+ *
|
|
+ * The Hunspell java bindings are licensed under the same terms as Hunspell itself (GPL/LGPL/MPL tri-license),
|
|
+ * see the file COPYING.txt in the root of the distribution for the exact terms.
|
|
+ *
|
|
+ * @author Flemming Frandsen (flfr at stibo dot com)
|
|
+ */
|
|
+
|
|
+public interface HunspellLibrary extends Library {
|
|
+
|
|
+ /**
|
|
+ * Create the hunspell instance
|
|
+ * @param affpath The affix file
|
|
+ * @param dpath The dictionary file
|
|
+ * @return The hunspell object
|
|
+ */
|
|
+ public Pointer Hunspell_create(String affpath, String dpath);
|
|
+
|
|
+ /**
|
|
+ * Destroy him my robots...
|
|
+ * @param pHunspell The Hunspell object returned by Hunspell_create
|
|
+ */
|
|
+ public void Hunspell_destroy(Pointer pHunspell);
|
|
+
|
|
+ /**
|
|
+ * spell(word) - spellcheck word
|
|
+ * @param pHunspell The Hunspell object returned by Hunspell_create
|
|
+ * @param word The word to spellcheck.
|
|
+ * @return 0 = bad word, not 0 = good word
|
|
+ */
|
|
+ public int Hunspell_spell(Pointer pHunspell, byte[] word);
|
|
+
|
|
+ /**
|
|
+ * Get the dictionary encoding
|
|
+ * @param pHunspell : The Hunspell object returned by Hunspell_create
|
|
+ * @return The encoding name
|
|
+ */
|
|
+ public String Hunspell_get_dic_encoding(Pointer pHunspell);
|
|
+
|
|
+ /**
|
|
+ * Search suggestions
|
|
+ * @param pHunspell The Hunspell object returned by Hunspell_create
|
|
+ * @param slst
|
|
+ * input: pointer to an array of strings pointer and the (bad) word
|
|
+ * array of strings pointer (here *slst) may not be initialized
|
|
+ * output: number of suggestions in string array, and suggestions in
|
|
+ * a newly allocated array of strings (*slts will be NULL when number
|
|
+ * of suggestion equals 0.)
|
|
+ * @param word The word to offer suggestions for.
|
|
+ */
|
|
+ public int Hunspell_suggest(Pointer pHunspell, PointerByReference slst, byte[] word);
|
|
+
|
|
+ /**
|
|
+ * Add a word to the run-time dictionary.
|
|
+ * @param pHunspell The Hunspell object returned by Hunspell_create
|
|
+ * @param word The word added to the runtime dictionary.
|
|
+ */
|
|
+ public int Hunspell_add(Pointer pHunspell, byte[] word);
|
|
+
|
|
+}
|
|
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java 2019-12-27 11:17:28.000000000 +0100
|
|
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/hunspell/HunspellRule.java 2020-01-07 09:32:01.278033500 +0100
|
|
@@ -27,9 +27,12 @@
|
|
import java.net.URISyntaxException;
|
|
import java.net.URL;
|
|
import java.nio.charset.StandardCharsets;
|
|
-import java.nio.file.Path;
|
|
-import java.nio.file.Paths;
|
|
-import java.util.*;
|
|
+import java.util.ArrayList;
|
|
+import java.util.Arrays;
|
|
+import java.util.Collections;
|
|
+import java.util.List;
|
|
+import java.util.Queue;
|
|
+import java.util.ResourceBundle;
|
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
|
import java.util.regex.Pattern;
|
|
import java.util.stream.Collectors;
|
|
@@ -70,7 +73,7 @@
|
|
|
|
protected final SuggestionsOrderer suggestionsOrderer;
|
|
protected boolean needsInit = true;
|
|
- protected Hunspell hunspell = null;
|
|
+ protected Hunspell.Dictionary hunspellDict = null;
|
|
|
|
private static final ConcurrentLinkedQueue<String> activeChecks = new ConcurrentLinkedQueue<>();
|
|
private static final String NON_ALPHABETIC = "[^\\p{L}]";
|
|
@@ -141,7 +144,7 @@
|
|
if (needsInit) {
|
|
init();
|
|
}
|
|
- if (hunspell == null) {
|
|
+ if (hunspellDict == null) {
|
|
// some languages might not have a dictionary, be silent about it
|
|
return toRuleMatchArray(ruleMatches);
|
|
}
|
|
@@ -297,7 +300,7 @@
|
|
}
|
|
return (
|
|
isAlphabetic && !"--".equals(word)
|
|
- && (hunspell != null && !hunspell.spell(word))
|
|
+ && (hunspellDict != null && hunspellDict.misspelled(word))
|
|
&& !ignoreWord(word)
|
|
)
|
|
|| isProhibited(cutOffDot(word));
|
|
@@ -310,7 +313,7 @@
|
|
if (needsInit) {
|
|
init();
|
|
}
|
|
- return hunspell.suggest(word);
|
|
+ return hunspellDict.suggest(word);
|
|
}
|
|
|
|
protected List<String> sortSuggestionByQuality(String misspelling, List<String> suggestions) {
|
|
@@ -368,33 +371,20 @@
|
|
String shortDicPath = getDictFilenameInResources(langCountry);
|
|
String wordChars = "";
|
|
// set dictionary only if there are dictionary files:
|
|
- Path affPath = null;
|
|
if (JLanguageTool.getDataBroker().resourceExists(shortDicPath)) {
|
|
String path = getDictionaryPath(langCountry, shortDicPath);
|
|
if ("".equals(path)) {
|
|
- hunspell = null;
|
|
+ hunspellDict = null;
|
|
} else {
|
|
- affPath = Paths.get(path + ".aff");
|
|
- hunspell = Hunspell.getInstance(Paths.get(path + ".dic"), affPath);
|
|
+ hunspellDict = Hunspell.getInstance().getDictionary(path);
|
|
addIgnoreWords();
|
|
}
|
|
} else if (new File(shortDicPath + ".dic").exists()) {
|
|
// for dynamic languages
|
|
- affPath = Paths.get(shortDicPath + ".aff");
|
|
- hunspell = Hunspell.getInstance(Paths.get(shortDicPath + ".dic"), affPath);
|
|
+ hunspellDict = Hunspell.getInstance().getDictionary(shortDicPath);
|
|
}
|
|
- if (affPath != null) {
|
|
- Scanner sc = new Scanner(affPath);
|
|
- while (sc.hasNextLine()) {
|
|
- String line = sc.nextLine();
|
|
- if (line.startsWith("WORDCHARS ")) {
|
|
- String wordCharsFromAff = line.substring("WORDCHARS ".length());
|
|
- //System.out.println("#" + wordCharsFromAff+ "#");
|
|
- wordChars = "(?![" + wordCharsFromAff.replace("-", "\\-") + "])";
|
|
- break;
|
|
- }
|
|
- }
|
|
-
|
|
+ if (hunspellDict != null && !hunspellDict.getWordChars().isEmpty()) {
|
|
+ wordChars = "(?![" + hunspellDict.getWordChars().replace("-", "\\-") + "])";
|
|
}
|
|
nonWordPattern = Pattern.compile(wordChars + NON_ALPHABETIC);
|
|
needsInit = false;
|
|
@@ -406,13 +396,13 @@
|
|
}
|
|
|
|
private void addIgnoreWords() throws IOException {
|
|
- wordsToBeIgnored.add(SpellingCheckRule.LANGUAGETOOL);
|
|
- wordsToBeIgnored.add(SpellingCheckRule.LANGUAGETOOLER);
|
|
+ hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOL);
|
|
+ hunspellDict.addWord(SpellingCheckRule.LANGUAGETOOLER);
|
|
URL ignoreUrl = JLanguageTool.getDataBroker().getFromResourceDirAsUrl(getIgnoreFileName());
|
|
List<String> ignoreLines = Resources.readLines(ignoreUrl, StandardCharsets.UTF_8);
|
|
for (String ignoreLine : ignoreLines) {
|
|
if (!ignoreLine.startsWith("#")) {
|
|
- wordsToBeIgnored.add(ignoreLine);
|
|
+ hunspellDict.addWord(ignoreLine);
|
|
}
|
|
}
|
|
}
|
|
--- languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java 2019-12-27 11:17:28.000000000 +0100
|
|
+++ languagetool-4.8/languagetool-core/src/main/java/org/languagetool/rules/spelling/SpellingCheckRule.java 2020-01-07 09:32:01.278033500 +0100
|
|
@@ -81,6 +81,7 @@
|
|
private static final Comparator<String> STRING_LENGTH_COMPARATOR = Comparator.comparingInt(String::length);
|
|
|
|
private final UserConfig userConfig;
|
|
+ private final Set<String> wordsToBeIgnored = new HashSet<>();
|
|
private final Set<String> wordsToBeProhibited = new HashSet<>();
|
|
private final List<RuleWithLanguage> altRules;
|
|
|
|
@@ -90,7 +91,6 @@
|
|
private List<DisambiguationPatternRule> antiPatterns = new ArrayList<>();
|
|
private boolean considerIgnoreWords = true;
|
|
private boolean convertsCase = false;
|
|
- protected final Set<String> wordsToBeIgnored = new HashSet<>();
|
|
protected int ignoreWordsWithLength = 0;
|
|
|
|
public SpellingCheckRule(ResourceBundle messages, Language language, UserConfig userConfig) {
|
|
--- languagetool-4.8/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java 2019-12-27 11:17:28.000000000 +0100
|
|
+++ languagetool-4.8/languagetool-language-modules/de/src/main/java/org/languagetool/rules/de/GermanSpellerRule.java 2020-01-07 09:32:01.282033523 +0100
|
|
@@ -1132,107 +1132,107 @@
|
|
return Collections.singletonList("Std.");
|
|
} else if (word.matches(".*ibel[hk]eit$")) {
|
|
suggestion = word.replaceFirst("el[hk]eit$", "ilität");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.endsWith("aquise")) {
|
|
suggestion = word.replaceFirst("aquise$", "akquise");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.endsWith("standart")) {
|
|
suggestion = word.replaceFirst("standart$", "standard");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.endsWith("standarts")) {
|
|
suggestion = word.replaceFirst("standarts$", "standards");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.endsWith("tips")) {
|
|
suggestion = word.replaceFirst("tips$", "tipps");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.endsWith("tip")) {
|
|
suggestion = word + "p";
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.endsWith("entfehlung")) {
|
|
suggestion = word.replaceFirst("ent", "emp");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.endsWith("oullie")) {
|
|
suggestion = word.replaceFirst("oullie$", "ouille");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.startsWith("[dD]urschnitt")) {
|
|
suggestion = word.replaceFirst("^urschnitt", "urchschnitt");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.startsWith("Bundstift")) {
|
|
suggestion = word.replaceFirst("^Bundstift", "Buntstift");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches("[aA]llmähll?i(g|ch)(e[mnrs]?)?")) {
|
|
suggestion = word.replaceFirst("llmähll?i(g|ch)", "llmählich");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches(".*[mM]a[jy]onn?[äe]se.*")) {
|
|
suggestion = word.replaceFirst("a[jy]onn?[äe]se", "ayonnaise");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches(".*[rR]es(a|er)[vw]i[he]?rung(en)?")) {
|
|
suggestion = word.replaceFirst("es(a|er)[vw]i[he]?rung", "eservierung");
|
|
- if (hunspell.spell(suggestion)) { // suggest e.g. 'Ticketreservierung', but not 'Blödsinnsquatschreservierung'
|
|
+ if (!hunspellDict.misspelled(suggestion)) { // suggest e.g. 'Ticketreservierung', but not 'Blödsinnsquatschreservierung'
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches("[rR]eschaschier.+")) {
|
|
suggestion = word.replaceFirst("schaschier", "cherchier");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches(".*[lL]aborants$")) {
|
|
suggestion = word.replaceFirst("ts$", "ten");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches("[pP]roff?ess?ion([äe])h?ll?(e[mnrs]?)?")) {
|
|
suggestion = word.replaceFirst("roff?ess?ion([äe])h?l{1,2}", "rofessionell");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches("[vV]erstehendniss?(es?)?")) {
|
|
suggestion = word.replaceFirst("[vV]erstehendnis", "Verständnis");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches("koregier.+")) {
|
|
suggestion = word.replaceAll("reg", "rrig");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches("diagno[sz]ier.*")) {
|
|
suggestion = word.replaceAll("gno[sz]ier", "gnostizier");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches(".*eiss.*")) {
|
|
suggestion = word.replaceAll("eiss", "eiß");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.matches(".*uess.*")) {
|
|
suggestion = word.replaceAll("uess", "üß");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.equals("gin")) {
|
|
@@ -1286,17 +1286,17 @@
|
|
return Collections.singletonList("Ladys");
|
|
} else if (word.endsWith("derbies")) {
|
|
suggestion = word.replaceFirst("derbies$", "derbys");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.endsWith("stories")) {
|
|
suggestion = word.replaceFirst("stories$", "storys");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
} else if (word.endsWith("parties")) {
|
|
suggestion = word.replaceFirst("parties$", "partys");
|
|
- if (hunspell.spell(suggestion)) {
|
|
+ if (!hunspellDict.misspelled(suggestion)) {
|
|
return Collections.singletonList(suggestion);
|
|
}
|
|
}
|
|
@@ -1334,8 +1334,8 @@
|
|
return Collections.singletonList("Zynismus");
|
|
} else if (word.matches("Email[a-zäöü]{5,}")) {
|
|
String suffix = word.substring(5);
|
|
- if (!hunspell.spell(suffix)) {
|
|
- List<String> suffixSuggestions = hunspell.suggest(StringTools.uppercaseFirstChar(suffix));
|
|
+ if (hunspellDict.misspelled(suffix)) {
|
|
+ List<String> suffixSuggestions = hunspellDict.suggest(suffix);
|
|
suffix = suffixSuggestions.isEmpty() ? suffix : suffixSuggestions.get(0);
|
|
}
|
|
return Collections.singletonList("E-Mail-"+Character.toUpperCase(suffix.charAt(0))+suffix.substring(1));
|
|
@@ -1352,7 +1352,7 @@
|
|
}
|
|
if (!StringTools.startsWithUppercase(word)) {
|
|
String ucWord = StringTools.uppercaseFirstChar(word);
|
|
- if (!suggestions.contains(ucWord) && hunspell.spell(ucWord) && !ucWord.endsWith(".")) {
|
|
+ if (!suggestions.contains(ucWord) && !hunspellDict.misspelled(ucWord) && !ucWord.endsWith(".")) {
|
|
// Hunspell doesn't always automatically offer the most obvious suggestion for compounds:
|
|
return Collections.singletonList(ucWord);
|
|
}
|
|
@@ -1386,7 +1386,7 @@
|
|
stopAt = words.length-2;
|
|
}
|
|
for (int idx = startAt; idx < stopAt; idx++) {
|
|
- if (!hunspell.spell(words[idx])) {
|
|
+ if (hunspellDict.misspelled(words[idx])) {
|
|
List<String> list = sortSuggestionByQuality(words[idx], super.getSuggestions(words[idx]));
|
|
suggestionLists.add(list);
|
|
} else {
|
|
@@ -1473,7 +1473,7 @@
|
|
private String getParticipleForBaseform(String baseform) throws IOException {
|
|
AnalyzedToken token = new AnalyzedToken(baseform, null, baseform);
|
|
String[] forms = synthesizer.synthesize(token, "VER:PA2:.*", true);
|
|
- if (forms.length > 0 && hunspell.spell(forms[0])) {
|
|
+ if (forms.length > 0 && !hunspellDict.misspelled(forms[0])) {
|
|
return forms[0];
|
|
}
|
|
return null;
|
|
@@ -1498,12 +1498,12 @@
|
|
boolean isCompound = nextWord != null && (compoundTokenizer.tokenize(nextWord).size() > 1 || nextWord.indexOf('-') > 0);
|
|
if (isCompound) {
|
|
word = StringUtils.removeEnd(word, "-");
|
|
- boolean isMisspelled = !hunspell.spell(word); // "Stil- und Grammatikprüfung" or "Stil-, Text- und Grammatikprüfung"
|
|
+ boolean isMisspelled = hunspellDict.misspelled(word); // "Stil- und Grammatikprüfung" or "Stil-, Text- und Grammatikprüfung"
|
|
if (isMisspelled && (super.ignoreWord(word) || wordsToBeIgnoredInCompounds.contains(word))) {
|
|
isMisspelled = false;
|
|
} else if (isMisspelled && word.endsWith("s") && isNeedingFugenS(StringUtils.removeEnd(word, "s"))) {
|
|
// Vertuschungs- und Bespitzelungsmaßnahmen: remove trailing "s" before checking "Vertuschungs" so that the spell checker finds it
|
|
- isMisspelled = !hunspell.spell(StringUtils.removeEnd(word, "s"));
|
|
+ isMisspelled = hunspellDict.misspelled(StringUtils.removeEnd(word, "s"));
|
|
}
|
|
return !isMisspelled;
|
|
}
|
|
@@ -1556,10 +1556,10 @@
|
|
boolean isCandidateForNonHyphenatedCompound = !StringUtils.isAllUpperCase(ignoredWord) && (StringUtils.isAllLowerCase(partialWord) || ignoredWord.endsWith("-"));
|
|
boolean needFugenS = isNeedingFugenS(ignoredWord);
|
|
if (isCandidateForNonHyphenatedCompound && !needFugenS && partialWord.length() > 2) {
|
|
- return hunspell.spell(partialWord) || hunspell.spell(StringUtils.capitalize(partialWord));
|
|
+ return !hunspellDict.misspelled(partialWord) || !hunspellDict.misspelled(StringUtils.capitalize(partialWord));
|
|
} else if (isCandidateForNonHyphenatedCompound && needFugenS && partialWord.length() > 2) {
|
|
partialWord = partialWord.startsWith("s") ? partialWord.substring(1) : partialWord;
|
|
- return hunspell.spell(partialWord) || hunspell.spell(StringUtils.capitalize(partialWord));
|
|
+ return !hunspellDict.misspelled(partialWord) || !hunspellDict.misspelled(StringUtils.capitalize(partialWord));
|
|
}
|
|
return false;
|
|
}
|
|
@@ -1591,7 +1591,7 @@
|
|
|
|
if (hasIgnoredWord) {
|
|
for (String w : toSpellCheck) {
|
|
- if (!hunspell.spell(w)) {
|
|
+ if (hunspellDict.misspelled(w)) {
|
|
return false;
|
|
}
|
|
}
|
|
--- languagetool-4.8/languagetool-wikipedia/src/main/java/org/languagetool/dev/RareWordsFinder.java 2019-12-27 11:17:28.000000000 +0100
|
|
+++ languagetool-4.8/languagetool-wikipedia/src/main/java/org/languagetool/dev/RareWordsFinder.java 2020-01-07 09:32:01.282033523 +0100
|
|
@@ -25,7 +25,6 @@
|
|
import java.io.FileNotFoundException;
|
|
import java.io.IOException;
|
|
import java.nio.charset.CharacterCodingException;
|
|
-import java.nio.file.Paths;
|
|
import java.util.List;
|
|
import java.util.Scanner;
|
|
|
|
@@ -39,10 +38,11 @@
|
|
|
|
private static final String dictInClassPath = "/en/hunspell/en_US.dict";
|
|
|
|
- private final Hunspell hunspell;
|
|
+ private final Hunspell.Dictionary hunspellDict;
|
|
|
|
private RareWordsFinder(String hunspellBase) throws IOException {
|
|
- hunspell = new Hunspell(Paths.get(hunspellBase + ".dic"), Paths.get(hunspellBase + ".aff"));
|
|
+ Hunspell hunspell = Hunspell.getInstance();
|
|
+ hunspellDict = hunspell.getDictionary(hunspellBase);
|
|
}
|
|
|
|
private void run(File input, int minimum) throws FileNotFoundException, CharacterCodingException {
|
|
@@ -60,7 +60,7 @@
|
|
boolean isMisspelled = speller.isMisspelled(word);
|
|
if (!isMisspelled) {
|
|
//List<String> suggestions = speller.getSuggestions(word); // seems to work only for words that are actually misspellings
|
|
- List<String> suggestions = hunspell.suggest(word);
|
|
+ List<String> suggestions = hunspellDict.suggest(word);
|
|
suggestions.remove(word);
|
|
if (suggestionsMightBeUseful(word, suggestions)) {
|
|
System.out.println(word + "\t" + count + " -> " + String.join(", ", suggestions));
|