From bfcc2e41cc81a68491a197dd170840486c95201e04ec229003b7cc686b26bb26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Adrian=20Schr=C3=B6ter?= Date: Fri, 3 May 2024 11:42:16 +0200 Subject: [PATCH] Sync from SUSE:SLFO:Main cmuclmtk revision f6ef2c45190a721df251ae6a90e85aeb --- .gitattributes | 23 +++++++ 0002-Fix-includes.patch | 85 ++++++++++++++++++++++++ 0003-Fix-endian-check.patch | 13 ++++ 0004-Fix-vocab_size.patch | 39 ++++++++++++ cmuclmtk-0.7.tar.gz | 3 + cmuclmtk.changes | 39 ++++++++++++ cmuclmtk.spec | 124 ++++++++++++++++++++++++++++++++++++ decl-mismatch.patch | 20 ++++++ 8 files changed, 346 insertions(+) create mode 100644 .gitattributes create mode 100644 0002-Fix-includes.patch create mode 100644 0003-Fix-endian-check.patch create mode 100644 0004-Fix-vocab_size.patch create mode 100644 cmuclmtk-0.7.tar.gz create mode 100644 cmuclmtk.changes create mode 100644 cmuclmtk.spec create mode 100644 decl-mismatch.patch diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..9b03811 --- /dev/null +++ b/.gitattributes @@ -0,0 +1,23 @@ +## Default LFS +*.7z filter=lfs diff=lfs merge=lfs -text +*.bsp filter=lfs diff=lfs merge=lfs -text +*.bz2 filter=lfs diff=lfs merge=lfs -text +*.gem filter=lfs diff=lfs merge=lfs -text +*.gz filter=lfs diff=lfs merge=lfs -text +*.jar filter=lfs diff=lfs merge=lfs -text +*.lz filter=lfs diff=lfs merge=lfs -text +*.lzma filter=lfs diff=lfs merge=lfs -text +*.obscpio filter=lfs diff=lfs merge=lfs -text +*.oxt filter=lfs diff=lfs merge=lfs -text +*.pdf filter=lfs diff=lfs merge=lfs -text +*.png filter=lfs diff=lfs merge=lfs -text +*.rpm filter=lfs diff=lfs merge=lfs -text +*.tbz filter=lfs diff=lfs merge=lfs -text +*.tbz2 filter=lfs diff=lfs merge=lfs -text +*.tgz filter=lfs diff=lfs merge=lfs -text +*.ttf filter=lfs diff=lfs merge=lfs -text +*.txz filter=lfs diff=lfs merge=lfs -text +*.whl filter=lfs diff=lfs merge=lfs -text +*.xz filter=lfs diff=lfs merge=lfs -text +*.zip filter=lfs diff=lfs merge=lfs -text +*.zst filter=lfs diff=lfs merge=lfs -text diff --git a/0002-Fix-includes.patch b/0002-Fix-includes.patch new file mode 100644 index 0000000..97a69b7 --- /dev/null +++ b/0002-Fix-includes.patch @@ -0,0 +1,85 @@ +--- src/liblmest/generate.c ++++ src/liblmest/generate.c +@@ -99,8 +99,8 @@ + int i,j,bo_case,initial_history_id; + id__t sought_trigram[3]; + double p,acc,trigram_prob; +- vocab_sz_t lm_vocab_sz; +- char** lm_vocab; ++ vocab_sz_t lm_vocab_sz = 0; ++ char** lm_vocab = NULL; + + if(png!=NULL && pang!=NULL) + quit(-1,"Confused by multiple input type.\n"); +--- src/libs/rr_mkdtemp.c ++++ src/libs/rr_mkdtemp.c +@@ -36,6 +36,8 @@ + + #include + #include ++#include ++#include + + #include <../win32/compat.h> + +--- src/programs/text2idngram.c ++++ src/programs/text2idngram.c +@@ -51,7 +51,7 @@ + #include + #include + #include +-#include ++#include + #include + + #include "../liblmest/toolkit.h" +--- src/programs/text2wngram.c ++++ src/programs/text2wngram.c +@@ -36,10 +36,10 @@ + + #define DEFAULT_MAX_FILES 20 + +-#include + #include + #include + #include ++#include + #include + + #include "../liblmest/toolkit.h" +@@ -70,6 +70,15 @@ + fprintf(stderr," < .text > .wngram\n"); + } + ++void merge_tempfiles (int start_file, ++ int end_file, ++ char *temp_file_root, ++ char *temp_file_ext, ++ int max_files, ++ FILE *outfile, ++ int n, ++ int verbosity); ++ + int main (int argc, char **argv) { + + int n; +--- src/programs/wngram2idngram.c ++++ src/programs/wngram2idngram.c +@@ -45,7 +45,7 @@ + #include + #include + #include +-#include ++#include + #include + + #include "../liblmest/toolkit.h" +@@ -361,7 +361,7 @@ + }else { + /* Write to temporary file */ + for (i=0;i<=n-1;i++) +- rr_fwrite((char*)¤t_ngram[i],sizeof(unsigned short),1, ++ rr_fwrite((char*)¤t_ngram[i],sizeof(wordid_t),1, + non_unk_fp,"temporary n-gram ids"); + + rr_fwrite((char*)¤t_count,sizeof(int),1,non_unk_fp, diff --git a/0003-Fix-endian-check.patch b/0003-Fix-endian-check.patch new file mode 100644 index 0000000..f1a09e4 --- /dev/null +++ b/0003-Fix-endian-check.patch @@ -0,0 +1,13 @@ +--- src/libs/mips_swap.h ++++ src/libs/mips_swap.h +@@ -20,8 +20,8 @@ + + #include "general.h" + +-#ifndef WORDS_BIGENDIAN /* reverse byteorder */ +- ++#if __BYTE_ORDER == __LITTLE_ENDIAN ++/* reverse byteorder */ + /* the following works even for badly aligned pointers */ + + #define SWAPFIELD(x) {if (sizeof(*(x))==sizeof(short)) {SWAPHALF((x))} \ diff --git a/0004-Fix-vocab_size.patch b/0004-Fix-vocab_size.patch new file mode 100644 index 0000000..6be620b --- /dev/null +++ b/0004-Fix-vocab_size.patch @@ -0,0 +1,39 @@ +--- src/liblmest/load_lm.c ++++ src/liblmest/load_lm.c +@@ -84,6 +84,7 @@ + char *lm_filename) { + + int i; ++ wordid_t vocab_size; + ng->disc_meth=NULL; + ng->vocab_size = 0; + ng->bin_fp = rr_iopen(lm_filename); +@@ -98,7 +99,8 @@ + /* Scalar parameters */ + + rr_fread((char*)&ng->n,sizeof(unsigned short),1,ng->bin_fp,"n",0); +- rr_fread((char*)&ng->vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size",0); ++ rr_fread((char*)&vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size",0); ++ ng->vocab_size = (vocab_sz_t) vocab_size; + rr_fread((char*)&ng->no_of_ccs,sizeof(unsigned short),1,ng->bin_fp,"no_of_ccs",0); + rr_fread((char*)&ng->vocab_type,sizeof(unsigned short),1,ng->bin_fp,"vocab_type",0); + +--- src/liblmest/write_lms.c ++++ src/liblmest/write_lms.c +@@ -432,6 +432,7 @@ + int l_chunk; + int from_rec; + int i; ++ wordid_t vocab_size = (wordid_t)ng->vocab_size; + + pc_message(verbosity,1,"Binary %d-gram language model will be written to %s\n",ng->n,ng->bin_filename); + +@@ -442,7 +443,7 @@ + rr_fwrite((char*)&ng->version,sizeof(int),1,ng->bin_fp,"version"); + rr_fwrite((char*)&ng->n,sizeof(unsigned short),1,ng->bin_fp,"n"); + +- rr_fwrite((char*)&ng->vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size"); ++ rr_fwrite((char*)&vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size"); + rr_fwrite((char*)&ng->no_of_ccs,sizeof(unsigned short),1,ng->bin_fp,"no_of_ccs"); + rr_fwrite((char*)&ng->vocab_type,sizeof(unsigned short),1,ng->bin_fp,"vocab_type"); + diff --git a/cmuclmtk-0.7.tar.gz b/cmuclmtk-0.7.tar.gz new file mode 100644 index 0000000..db3aa73 --- /dev/null +++ b/cmuclmtk-0.7.tar.gz @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d23e47f00224667c059d69ac942f15dc3d4c3dd40e827318a6213699b7fa2915 +size 9123964 diff --git a/cmuclmtk.changes b/cmuclmtk.changes new file mode 100644 index 0000000..c020591 --- /dev/null +++ b/cmuclmtk.changes @@ -0,0 +1,39 @@ +------------------------------------------------------------------- +Thu Feb 8 16:37:48 UTC 2018 - stefan.bruens@rwth-aachen.de + +- Remove Buildrequires: gdb and vim. As the latter is build with + gvim support, this adds all of gtk and rust (librsvg) to the + build dependency chain. Via presage -> fcitx this also creates a + huge build dependency loop. + +------------------------------------------------------------------- +Tue Mar 11 01:26:19 CET 2014 - ro@suse.de + +- add patch 0002-Fix-includes.patch from upstream +- add patch 0003-Fix-endian-check.patch +- add patch 0004-Fix-vocab_size.patch +- run testsuite (but ignore errors for now, completes only + on little-endian 64bit) + +------------------------------------------------------------------- +Sat Mar 1 20:03:46 UTC 2014 - schwab@suse.de + +- decl-mismatch.patch: fix mismatching declaration + +------------------------------------------------------------------- +Tue Sep 18 12:55:46 UTC 2012 - i@marguerite.su + +- add cmuclmtk requires to libcmuclmtk-devel. + +------------------------------------------------------------------- +Mon Jul 30 20:45:13 UTC 2012 - i@marguerite.su + +- finished license investigation (#bnc773622) under + * https://bugzilla.novell.com/show_bug.cgi?id=773622 + +------------------------------------------------------------------- +Mon Jul 23 13:17:47 UTC 2012 - i@marguerite.su + +- initial version 0.7 + * see NEWS for detail changelog. + diff --git a/cmuclmtk.spec b/cmuclmtk.spec new file mode 100644 index 0000000..2b50a69 --- /dev/null +++ b/cmuclmtk.spec @@ -0,0 +1,124 @@ +# +# spec file for package cmuclmtk +# +# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany. +# +# All modifications and additions to the file contributed by third parties +# remain the property of their copyright owners, unless otherwise agreed +# upon. The license for this file, and modifications and additions to the +# file, is the same license as for the pristine package itself (unless the +# license for the pristine package is not an Open Source License, in which +# case the license is the MIT License). An "Open Source License" is a +# license that conforms to the Open Source Definition (Version 1.9) +# published by the Open Source Initiative. + +# Please submit bugfixes or comments via http://bugs.opensuse.org/ +# + + +Name: cmuclmtk +Version: 0.7 +Release: 0 +Summary: CMU-Cambridge Statistical Language Modeling toolkit +License: AFL-2.1 and BSD-3-Clause +Group: System/Libraries +Url: http://cmusphinx.sourceforge.net +Source: %{name}-%{version}.tar.gz +Patch1: decl-mismatch.patch +Patch2: 0002-Fix-includes.patch +Patch3: 0003-Fix-endian-check.patch +Patch4: 0004-Fix-vocab_size.patch +BuildRoot: %{_tmppath}/%{name}-%{version}-build +BuildRequires: gcc +BuildRequires: gmake +BuildRequires: gawk + +%description +The CMU-Cambridge Language Modeling Toolkit is a free set of tools +for constructing and testing statistical N-Gram language models. +These models have various applications including speech recognition, +machine translation, optical character and handwriting recognition. + +This package contains the front-end tools for easy language model +training as well as the basic tools for manipulating N-Gram and text files. + +%package -n libcmuclmtk0 +Summary: CMU-Cambridge Statistical Language Modeling toolkit +Group: System/Libraries + +%description -n libcmuclmtk0 +The CMU-Cambridge Language Modeling Toolkit is a free set of tools +for constructing and testing statistical N-Gram language models. +These models have various applications including speech recognition, +machine translation, optical character and handwriting recognition. + +This package contains the shared library used by the CMU-Cambridge +Language Model Toolkit. + +%package -n libcmuclmtk-devel +Summary: CMU-Cambridge Statistical Language Modeling toolkit +Group: Development/Libraries/C and C++ +Requires: %{name} = %{version} +Requires: libcmuclmtk0 = %{version} + +%description -n libcmuclmtk-devel +The CMU-Cambridge Language Modeling Toolkit is a free set of tools +for constructing and testing statistical N-Gram language models. +These models have various applications including speech recognition, +machine translation, optical character and handwriting recognition. + +This package contains the include files and libraries used to compile +programs using the CMU-Cambridge Language Model Toolkit. + +%prep +%setup -q +%patch1 -p1 +%patch2 +%patch3 +%patch4 + +%build +%configure +make %{_smp_mflags} + +%install +make DESTDIR=%{buildroot} install + +rm -rf %{buildroot}%{_libdir}/*.a +rm -rf %{buildroot}%{_libdir}/*.la + +%check +make check || true + +%post -n libcmuclmtk0 -p /sbin/ldconfig + +%postun -n libcmuclmtk0 -p /sbin/ldconfig + +%files +%defattr(-,root,root) +%doc AUTHORS README NEWS COPYING TODO +%{_bindir}/binlm2arpa +%{_bindir}/evallm +%{_bindir}/idngram2lm +%{_bindir}/idngram2stats +%{_bindir}/lm_combine +%{_bindir}/lm_interpolate +%{_bindir}/mergeidngram +%{_bindir}/ngram2mgram +%{_bindir}/text2idngram +%{_bindir}/text2wfreq +%{_bindir}/text2wngram +%{_bindir}/wfreq2vocab +%{_bindir}/wngram2idngram + +%files -n libcmuclmtk0 +%defattr(-,root,root) +%{_libdir}/libcmuclmtk.so.0 +%{_libdir}/libcmuclmtk.so.0.0.0 + +%files -n libcmuclmtk-devel +%defattr(-,root,root) +%{_includedir}/cmuclmtk/ +%{_libdir}/libcmuclmtk.so + +%changelog diff --git a/decl-mismatch.patch b/decl-mismatch.patch new file mode 100644 index 0000000..77e71d9 --- /dev/null +++ b/decl-mismatch.patch @@ -0,0 +1,20 @@ +Index: cmuclmtk-0.7/src/libs/rd_wlist_arry.c +=================================================================== +--- cmuclmtk-0.7.orig/src/libs/rd_wlist_arry.c ++++ cmuclmtk-0.7/src/libs/rd_wlist_arry.c +@@ -41,13 +41,14 @@ + #include + #include + #include "general.h" ++#include "sih.h" + #include "ac_parsetext.h" + + /* allocate an lagre enough array and read in a list of words (first word on each line) + Leave entry no. 0 empty. + */ + +-void read_wlist_into_array(char *wlist_filename, int verbosity, char ***p_wlist, int *p_n_wlist) ++void read_wlist_into_array(char *wlist_filename, int verbosity, char ***p_wlist, vocab_sz_t *p_n_wlist) + { + static char rname[]="read_wlist_into_array"; + FILE *wlist_fp = rr_iopen(wlist_filename);