commit bfcc2e41cc81a68491a197dd170840486c95201e04ec229003b7cc686b26bb26
Author: Adrian Schröter <adrian@suse.de>
Date:   Fri May 3 11:42:16 2024 +0200

    Sync from SUSE:SLFO:Main cmuclmtk revision f6ef2c45190a721df251ae6a90e85aeb

diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..9b03811
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,23 @@
+## Default LFS
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.bsp filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.gem filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.jar filter=lfs diff=lfs merge=lfs -text
+*.lz filter=lfs diff=lfs merge=lfs -text
+*.lzma filter=lfs diff=lfs merge=lfs -text
+*.obscpio filter=lfs diff=lfs merge=lfs -text
+*.oxt filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
+*.png filter=lfs diff=lfs merge=lfs -text
+*.rpm filter=lfs diff=lfs merge=lfs -text
+*.tbz filter=lfs diff=lfs merge=lfs -text
+*.tbz2 filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.ttf filter=lfs diff=lfs merge=lfs -text
+*.txz filter=lfs diff=lfs merge=lfs -text
+*.whl filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
diff --git a/0002-Fix-includes.patch b/0002-Fix-includes.patch
new file mode 100644
index 0000000..97a69b7
--- /dev/null
+++ b/0002-Fix-includes.patch
@@ -0,0 +1,85 @@
+--- src/liblmest/generate.c
++++ src/liblmest/generate.c
+@@ -99,8 +99,8 @@
+   int i,j,bo_case,initial_history_id;
+   id__t sought_trigram[3];
+   double p,acc,trigram_prob;
+-  vocab_sz_t lm_vocab_sz;
+-  char** lm_vocab;
++  vocab_sz_t lm_vocab_sz = 0;
++  char** lm_vocab = NULL;
+ 
+   if(png!=NULL && pang!=NULL)
+     quit(-1,"Confused by multiple input type.\n");
+--- src/libs/rr_mkdtemp.c
++++ src/libs/rr_mkdtemp.c
+@@ -36,6 +36,8 @@
+ 
+ #include <stdio.h>
+ #include <stdlib.h>
++#include <sys/types.h>
++#include <sys/stat.h>
+ 
+ #include <../win32/compat.h>
+ 
+--- src/programs/text2idngram.c
++++ src/programs/text2idngram.c
+@@ -51,7 +51,7 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+-#include <sys/types.h>
++#include <unistd.h>
+ #include <errno.h>
+ 
+ #include "../liblmest/toolkit.h"
+--- src/programs/text2wngram.c
++++ src/programs/text2wngram.c
+@@ -36,10 +36,10 @@
+ 
+ #define DEFAULT_MAX_FILES 20
+ 
+-#include <sys/types.h>
+ #include <stdio.h>
+ #include <string.h>
+ #include <stdlib.h>
++#include <unistd.h>
+ #include <errno.h>
+ 
+ #include "../liblmest/toolkit.h"
+@@ -70,6 +70,15 @@
+     fprintf(stderr,"                    < .text > .wngram\n");
+ }
+ 
++void merge_tempfiles (int start_file, 
++                      int end_file, 
++                      char *temp_file_root,
++                      char *temp_file_ext,
++                      int max_files,
++                      FILE *outfile,
++                      int n,
++                      int verbosity);
++
+ int main (int argc, char **argv) {
+ 
+   int n;
+--- src/programs/wngram2idngram.c
++++ src/programs/wngram2idngram.c
+@@ -45,7 +45,7 @@
+ #include <stdio.h>
+ #include <stdlib.h>
+ #include <string.h>
+-#include <sys/types.h>
++#include <unistd.h>
+ #include <errno.h>
+ 
+ #include "../liblmest/toolkit.h"
+@@ -361,7 +361,7 @@
+       }else {
+ 	/* Write to temporary file */
+ 	for (i=0;i<=n-1;i++) 
+-	  rr_fwrite((char*)&current_ngram[i],sizeof(unsigned short),1,
++	  rr_fwrite((char*)&current_ngram[i],sizeof(wordid_t),1,
+ 		    non_unk_fp,"temporary n-gram ids");
+ 
+ 	rr_fwrite((char*)&current_count,sizeof(int),1,non_unk_fp,
diff --git a/0003-Fix-endian-check.patch b/0003-Fix-endian-check.patch
new file mode 100644
index 0000000..f1a09e4
--- /dev/null
+++ b/0003-Fix-endian-check.patch
@@ -0,0 +1,13 @@
+--- src/libs/mips_swap.h
++++ src/libs/mips_swap.h
+@@ -20,8 +20,8 @@
+ 
+ #include "general.h"
+ 
+-#ifndef WORDS_BIGENDIAN    /* reverse byteorder */
+-
++#if __BYTE_ORDER == __LITTLE_ENDIAN
++/* reverse byteorder */
+ /* the following works even for badly aligned pointers */
+ 
+ #define SWAPFIELD(x) {if     (sizeof(*(x))==sizeof(short)) {SWAPHALF((x))}  \
diff --git a/0004-Fix-vocab_size.patch b/0004-Fix-vocab_size.patch
new file mode 100644
index 0000000..6be620b
--- /dev/null
+++ b/0004-Fix-vocab_size.patch
@@ -0,0 +1,39 @@
+--- src/liblmest/load_lm.c
++++ src/liblmest/load_lm.c
+@@ -84,6 +84,7 @@
+ 	     char *lm_filename) {
+ 
+   int i;
++  wordid_t vocab_size;
+   ng->disc_meth=NULL;
+   ng->vocab_size = 0;
+   ng->bin_fp = rr_iopen(lm_filename);
+@@ -98,7 +99,8 @@
+   /* Scalar parameters */
+ 
+   rr_fread((char*)&ng->n,sizeof(unsigned short),1,ng->bin_fp,"n",0);
+-  rr_fread((char*)&ng->vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size",0);
++  rr_fread((char*)&vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size",0);
++  ng->vocab_size = (vocab_sz_t) vocab_size;
+   rr_fread((char*)&ng->no_of_ccs,sizeof(unsigned short),1,ng->bin_fp,"no_of_ccs",0);
+   rr_fread((char*)&ng->vocab_type,sizeof(unsigned short),1,ng->bin_fp,"vocab_type",0);
+ 
+--- src/liblmest/write_lms.c
++++ src/liblmest/write_lms.c
+@@ -432,6 +432,7 @@
+   int l_chunk;
+   int from_rec;
+   int i;
++  wordid_t vocab_size = (wordid_t)ng->vocab_size;
+ 
+   pc_message(verbosity,1,"Binary %d-gram language model will be written to %s\n",ng->n,ng->bin_filename);
+   
+@@ -442,7 +443,7 @@
+   rr_fwrite((char*)&ng->version,sizeof(int),1,ng->bin_fp,"version");
+   rr_fwrite((char*)&ng->n,sizeof(unsigned short),1,ng->bin_fp,"n");
+ 
+-  rr_fwrite((char*)&ng->vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size");
++  rr_fwrite((char*)&vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size");
+   rr_fwrite((char*)&ng->no_of_ccs,sizeof(unsigned short),1,ng->bin_fp,"no_of_ccs");
+   rr_fwrite((char*)&ng->vocab_type,sizeof(unsigned short),1,ng->bin_fp,"vocab_type");
+ 
diff --git a/cmuclmtk-0.7.tar.gz b/cmuclmtk-0.7.tar.gz
new file mode 100644
index 0000000..db3aa73
--- /dev/null
+++ b/cmuclmtk-0.7.tar.gz
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:d23e47f00224667c059d69ac942f15dc3d4c3dd40e827318a6213699b7fa2915
+size 9123964
diff --git a/cmuclmtk.changes b/cmuclmtk.changes
new file mode 100644
index 0000000..c020591
--- /dev/null
+++ b/cmuclmtk.changes
@@ -0,0 +1,39 @@
+-------------------------------------------------------------------
+Thu Feb  8 16:37:48 UTC 2018 - stefan.bruens@rwth-aachen.de
+
+- Remove Buildrequires: gdb and vim. As the latter is build with
+  gvim support, this adds all of gtk and rust (librsvg) to the
+  build dependency chain. Via presage -> fcitx this also creates a
+  huge build dependency loop.
+
+-------------------------------------------------------------------
+Tue Mar 11 01:26:19 CET 2014 - ro@suse.de
+
+- add patch 0002-Fix-includes.patch from upstream 
+- add patch 0003-Fix-endian-check.patch 
+- add patch 0004-Fix-vocab_size.patch
+- run testsuite (but ignore errors for now, completes only
+  on little-endian 64bit)
+
+-------------------------------------------------------------------
+Sat Mar  1 20:03:46 UTC 2014 - schwab@suse.de
+
+- decl-mismatch.patch: fix mismatching declaration
+
+-------------------------------------------------------------------
+Tue Sep 18 12:55:46 UTC 2012 - i@marguerite.su
+
+- add cmuclmtk requires to libcmuclmtk-devel.
+
+-------------------------------------------------------------------
+Mon Jul 30 20:45:13 UTC 2012 - i@marguerite.su
+
+- finished license investigation (#bnc773622) under
+  * https://bugzilla.novell.com/show_bug.cgi?id=773622
+
+-------------------------------------------------------------------
+Mon Jul 23 13:17:47 UTC 2012 - i@marguerite.su
+
+- initial version 0.7
+  * see NEWS for detail changelog.
+
diff --git a/cmuclmtk.spec b/cmuclmtk.spec
new file mode 100644
index 0000000..2b50a69
--- /dev/null
+++ b/cmuclmtk.spec
@@ -0,0 +1,124 @@
+#
+# spec file for package cmuclmtk
+#
+# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany.
+#
+# All modifications and additions to the file contributed by third parties
+# remain the property of their copyright owners, unless otherwise agreed
+# upon. The license for this file, and modifications and additions to the
+# file, is the same license as for the pristine package itself (unless the
+# license for the pristine package is not an Open Source License, in which
+# case the license is the MIT License). An "Open Source License" is a
+# license that conforms to the Open Source Definition (Version 1.9)
+# published by the Open Source Initiative.
+
+# Please submit bugfixes or comments via http://bugs.opensuse.org/
+#
+
+
+Name:           cmuclmtk
+Version:        0.7
+Release:        0
+Summary:        CMU-Cambridge Statistical Language Modeling toolkit
+License:        AFL-2.1 and BSD-3-Clause
+Group:          System/Libraries
+Url:            http://cmusphinx.sourceforge.net
+Source:         %{name}-%{version}.tar.gz
+Patch1:         decl-mismatch.patch
+Patch2:         0002-Fix-includes.patch
+Patch3:         0003-Fix-endian-check.patch
+Patch4:         0004-Fix-vocab_size.patch
+BuildRoot:      %{_tmppath}/%{name}-%{version}-build
+BuildRequires:  gcc
+BuildRequires:  gmake
+BuildRequires:  gawk
+
+%description
+The CMU-Cambridge Language Modeling Toolkit is a free set of tools
+for constructing and testing statistical N-Gram language models.
+These models have various applications including speech recognition,
+machine translation, optical character and handwriting recognition.
+
+This package contains the front-end tools for easy language model
+training as well as the basic tools for manipulating N-Gram and text files.
+
+%package -n libcmuclmtk0
+Summary:        CMU-Cambridge Statistical Language Modeling toolkit
+Group:          System/Libraries
+
+%description -n libcmuclmtk0
+The CMU-Cambridge Language Modeling Toolkit is a free set of tools
+for constructing and testing statistical N-Gram language models.
+These models have various applications including speech recognition,
+machine translation, optical character and handwriting recognition.
+
+This package contains the shared library used by the CMU-Cambridge
+Language Model Toolkit.
+
+%package -n libcmuclmtk-devel
+Summary:        CMU-Cambridge Statistical Language Modeling toolkit
+Group:          Development/Libraries/C and C++
+Requires:       %{name} = %{version}
+Requires:       libcmuclmtk0 = %{version}
+
+%description -n libcmuclmtk-devel
+The CMU-Cambridge Language Modeling Toolkit is a free set of tools
+for constructing and testing statistical N-Gram language models.
+These models have various applications including speech recognition,
+machine translation, optical character and handwriting recognition.
+
+This package contains the include files and libraries used to compile
+programs using the CMU-Cambridge Language Model Toolkit.
+
+%prep
+%setup -q
+%patch1 -p1
+%patch2
+%patch3
+%patch4
+
+%build
+%configure
+make %{_smp_mflags}
+
+%install
+make DESTDIR=%{buildroot} install
+
+rm -rf %{buildroot}%{_libdir}/*.a
+rm -rf %{buildroot}%{_libdir}/*.la
+
+%check
+make check || true
+
+%post -n libcmuclmtk0 -p /sbin/ldconfig
+
+%postun -n libcmuclmtk0 -p /sbin/ldconfig
+
+%files
+%defattr(-,root,root)
+%doc AUTHORS README NEWS COPYING TODO
+%{_bindir}/binlm2arpa
+%{_bindir}/evallm
+%{_bindir}/idngram2lm
+%{_bindir}/idngram2stats
+%{_bindir}/lm_combine
+%{_bindir}/lm_interpolate
+%{_bindir}/mergeidngram
+%{_bindir}/ngram2mgram
+%{_bindir}/text2idngram
+%{_bindir}/text2wfreq
+%{_bindir}/text2wngram
+%{_bindir}/wfreq2vocab
+%{_bindir}/wngram2idngram
+
+%files -n libcmuclmtk0
+%defattr(-,root,root)
+%{_libdir}/libcmuclmtk.so.0
+%{_libdir}/libcmuclmtk.so.0.0.0
+
+%files -n libcmuclmtk-devel
+%defattr(-,root,root)
+%{_includedir}/cmuclmtk/
+%{_libdir}/libcmuclmtk.so
+
+%changelog
diff --git a/decl-mismatch.patch b/decl-mismatch.patch
new file mode 100644
index 0000000..77e71d9
--- /dev/null
+++ b/decl-mismatch.patch
@@ -0,0 +1,20 @@
+Index: cmuclmtk-0.7/src/libs/rd_wlist_arry.c
+===================================================================
+--- cmuclmtk-0.7.orig/src/libs/rd_wlist_arry.c
++++ cmuclmtk-0.7/src/libs/rd_wlist_arry.c
+@@ -41,13 +41,14 @@
+ #include <stdio.h>
+ #include <string.h>
+ #include "general.h"
++#include "sih.h"
+ #include "ac_parsetext.h"
+ 
+ /* allocate an lagre enough array and read in a list of words (first word on each line)
+    Leave entry no. 0 empty.
+ */
+ 
+-void read_wlist_into_array(char *wlist_filename, int verbosity,  char ***p_wlist, int *p_n_wlist)
++void read_wlist_into_array(char *wlist_filename, int verbosity,  char ***p_wlist, vocab_sz_t *p_n_wlist)
+ {
+   static char rname[]="read_wlist_into_array";
+   FILE   *wlist_fp = rr_iopen(wlist_filename);