Sync from SUSE:SLFO:Main cmuclmtk revision f6ef2c45190a721df251ae6a90e85aeb

This commit is contained in:
Adrian Schröter 2024-05-03 11:42:16 +02:00
commit bfcc2e41cc
8 changed files with 346 additions and 0 deletions

23
.gitattributes vendored Normal file
View File

@ -0,0 +1,23 @@
## Default LFS
*.7z filter=lfs diff=lfs merge=lfs -text
*.bsp filter=lfs diff=lfs merge=lfs -text
*.bz2 filter=lfs diff=lfs merge=lfs -text
*.gem filter=lfs diff=lfs merge=lfs -text
*.gz filter=lfs diff=lfs merge=lfs -text
*.jar filter=lfs diff=lfs merge=lfs -text
*.lz filter=lfs diff=lfs merge=lfs -text
*.lzma filter=lfs diff=lfs merge=lfs -text
*.obscpio filter=lfs diff=lfs merge=lfs -text
*.oxt filter=lfs diff=lfs merge=lfs -text
*.pdf filter=lfs diff=lfs merge=lfs -text
*.png filter=lfs diff=lfs merge=lfs -text
*.rpm filter=lfs diff=lfs merge=lfs -text
*.tbz filter=lfs diff=lfs merge=lfs -text
*.tbz2 filter=lfs diff=lfs merge=lfs -text
*.tgz filter=lfs diff=lfs merge=lfs -text
*.ttf filter=lfs diff=lfs merge=lfs -text
*.txz filter=lfs diff=lfs merge=lfs -text
*.whl filter=lfs diff=lfs merge=lfs -text
*.xz filter=lfs diff=lfs merge=lfs -text
*.zip filter=lfs diff=lfs merge=lfs -text
*.zst filter=lfs diff=lfs merge=lfs -text

85
0002-Fix-includes.patch Normal file
View File

@ -0,0 +1,85 @@
--- src/liblmest/generate.c
+++ src/liblmest/generate.c
@@ -99,8 +99,8 @@
int i,j,bo_case,initial_history_id;
id__t sought_trigram[3];
double p,acc,trigram_prob;
- vocab_sz_t lm_vocab_sz;
- char** lm_vocab;
+ vocab_sz_t lm_vocab_sz = 0;
+ char** lm_vocab = NULL;
if(png!=NULL && pang!=NULL)
quit(-1,"Confused by multiple input type.\n");
--- src/libs/rr_mkdtemp.c
+++ src/libs/rr_mkdtemp.c
@@ -36,6 +36,8 @@
#include <stdio.h>
#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
#include <../win32/compat.h>
--- src/programs/text2idngram.c
+++ src/programs/text2idngram.c
@@ -51,7 +51,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/types.h>
+#include <unistd.h>
#include <errno.h>
#include "../liblmest/toolkit.h"
--- src/programs/text2wngram.c
+++ src/programs/text2wngram.c
@@ -36,10 +36,10 @@
#define DEFAULT_MAX_FILES 20
-#include <sys/types.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
+#include <unistd.h>
#include <errno.h>
#include "../liblmest/toolkit.h"
@@ -70,6 +70,15 @@
fprintf(stderr," < .text > .wngram\n");
}
+void merge_tempfiles (int start_file,
+ int end_file,
+ char *temp_file_root,
+ char *temp_file_ext,
+ int max_files,
+ FILE *outfile,
+ int n,
+ int verbosity);
+
int main (int argc, char **argv) {
int n;
--- src/programs/wngram2idngram.c
+++ src/programs/wngram2idngram.c
@@ -45,7 +45,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/types.h>
+#include <unistd.h>
#include <errno.h>
#include "../liblmest/toolkit.h"
@@ -361,7 +361,7 @@
}else {
/* Write to temporary file */
for (i=0;i<=n-1;i++)
- rr_fwrite((char*)&current_ngram[i],sizeof(unsigned short),1,
+ rr_fwrite((char*)&current_ngram[i],sizeof(wordid_t),1,
non_unk_fp,"temporary n-gram ids");
rr_fwrite((char*)&current_count,sizeof(int),1,non_unk_fp,

View File

@ -0,0 +1,13 @@
--- src/libs/mips_swap.h
+++ src/libs/mips_swap.h
@@ -20,8 +20,8 @@
#include "general.h"
-#ifndef WORDS_BIGENDIAN /* reverse byteorder */
-
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+/* reverse byteorder */
/* the following works even for badly aligned pointers */
#define SWAPFIELD(x) {if (sizeof(*(x))==sizeof(short)) {SWAPHALF((x))} \

39
0004-Fix-vocab_size.patch Normal file
View File

@ -0,0 +1,39 @@
--- src/liblmest/load_lm.c
+++ src/liblmest/load_lm.c
@@ -84,6 +84,7 @@
char *lm_filename) {
int i;
+ wordid_t vocab_size;
ng->disc_meth=NULL;
ng->vocab_size = 0;
ng->bin_fp = rr_iopen(lm_filename);
@@ -98,7 +99,8 @@
/* Scalar parameters */
rr_fread((char*)&ng->n,sizeof(unsigned short),1,ng->bin_fp,"n",0);
- rr_fread((char*)&ng->vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size",0);
+ rr_fread((char*)&vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size",0);
+ ng->vocab_size = (vocab_sz_t) vocab_size;
rr_fread((char*)&ng->no_of_ccs,sizeof(unsigned short),1,ng->bin_fp,"no_of_ccs",0);
rr_fread((char*)&ng->vocab_type,sizeof(unsigned short),1,ng->bin_fp,"vocab_type",0);
--- src/liblmest/write_lms.c
+++ src/liblmest/write_lms.c
@@ -432,6 +432,7 @@
int l_chunk;
int from_rec;
int i;
+ wordid_t vocab_size = (wordid_t)ng->vocab_size;
pc_message(verbosity,1,"Binary %d-gram language model will be written to %s\n",ng->n,ng->bin_filename);
@@ -442,7 +443,7 @@
rr_fwrite((char*)&ng->version,sizeof(int),1,ng->bin_fp,"version");
rr_fwrite((char*)&ng->n,sizeof(unsigned short),1,ng->bin_fp,"n");
- rr_fwrite((char*)&ng->vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size");
+ rr_fwrite((char*)&vocab_size,sizeof(wordid_t),1,ng->bin_fp,"vocab_size");
rr_fwrite((char*)&ng->no_of_ccs,sizeof(unsigned short),1,ng->bin_fp,"no_of_ccs");
rr_fwrite((char*)&ng->vocab_type,sizeof(unsigned short),1,ng->bin_fp,"vocab_type");

BIN
cmuclmtk-0.7.tar.gz (Stored with Git LFS) Normal file

Binary file not shown.

39
cmuclmtk.changes Normal file
View File

@ -0,0 +1,39 @@
-------------------------------------------------------------------
Thu Feb 8 16:37:48 UTC 2018 - stefan.bruens@rwth-aachen.de
- Remove Buildrequires: gdb and vim. As the latter is build with
gvim support, this adds all of gtk and rust (librsvg) to the
build dependency chain. Via presage -> fcitx this also creates a
huge build dependency loop.
-------------------------------------------------------------------
Tue Mar 11 01:26:19 CET 2014 - ro@suse.de
- add patch 0002-Fix-includes.patch from upstream
- add patch 0003-Fix-endian-check.patch
- add patch 0004-Fix-vocab_size.patch
- run testsuite (but ignore errors for now, completes only
on little-endian 64bit)
-------------------------------------------------------------------
Sat Mar 1 20:03:46 UTC 2014 - schwab@suse.de
- decl-mismatch.patch: fix mismatching declaration
-------------------------------------------------------------------
Tue Sep 18 12:55:46 UTC 2012 - i@marguerite.su
- add cmuclmtk requires to libcmuclmtk-devel.
-------------------------------------------------------------------
Mon Jul 30 20:45:13 UTC 2012 - i@marguerite.su
- finished license investigation (#bnc773622) under
* https://bugzilla.novell.com/show_bug.cgi?id=773622
-------------------------------------------------------------------
Mon Jul 23 13:17:47 UTC 2012 - i@marguerite.su
- initial version 0.7
* see NEWS for detail changelog.

124
cmuclmtk.spec Normal file
View File

@ -0,0 +1,124 @@
#
# spec file for package cmuclmtk
#
# Copyright (c) 2014 SUSE LINUX Products GmbH, Nuernberg, Germany.
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
# upon. The license for this file, and modifications and additions to the
# file, is the same license as for the pristine package itself (unless the
# license for the pristine package is not an Open Source License, in which
# case the license is the MIT License). An "Open Source License" is a
# license that conforms to the Open Source Definition (Version 1.9)
# published by the Open Source Initiative.
# Please submit bugfixes or comments via http://bugs.opensuse.org/
#
Name: cmuclmtk
Version: 0.7
Release: 0
Summary: CMU-Cambridge Statistical Language Modeling toolkit
License: AFL-2.1 and BSD-3-Clause
Group: System/Libraries
Url: http://cmusphinx.sourceforge.net
Source: %{name}-%{version}.tar.gz
Patch1: decl-mismatch.patch
Patch2: 0002-Fix-includes.patch
Patch3: 0003-Fix-endian-check.patch
Patch4: 0004-Fix-vocab_size.patch
BuildRoot: %{_tmppath}/%{name}-%{version}-build
BuildRequires: gcc
BuildRequires: gmake
BuildRequires: gawk
%description
The CMU-Cambridge Language Modeling Toolkit is a free set of tools
for constructing and testing statistical N-Gram language models.
These models have various applications including speech recognition,
machine translation, optical character and handwriting recognition.
This package contains the front-end tools for easy language model
training as well as the basic tools for manipulating N-Gram and text files.
%package -n libcmuclmtk0
Summary: CMU-Cambridge Statistical Language Modeling toolkit
Group: System/Libraries
%description -n libcmuclmtk0
The CMU-Cambridge Language Modeling Toolkit is a free set of tools
for constructing and testing statistical N-Gram language models.
These models have various applications including speech recognition,
machine translation, optical character and handwriting recognition.
This package contains the shared library used by the CMU-Cambridge
Language Model Toolkit.
%package -n libcmuclmtk-devel
Summary: CMU-Cambridge Statistical Language Modeling toolkit
Group: Development/Libraries/C and C++
Requires: %{name} = %{version}
Requires: libcmuclmtk0 = %{version}
%description -n libcmuclmtk-devel
The CMU-Cambridge Language Modeling Toolkit is a free set of tools
for constructing and testing statistical N-Gram language models.
These models have various applications including speech recognition,
machine translation, optical character and handwriting recognition.
This package contains the include files and libraries used to compile
programs using the CMU-Cambridge Language Model Toolkit.
%prep
%setup -q
%patch1 -p1
%patch2
%patch3
%patch4
%build
%configure
make %{_smp_mflags}
%install
make DESTDIR=%{buildroot} install
rm -rf %{buildroot}%{_libdir}/*.a
rm -rf %{buildroot}%{_libdir}/*.la
%check
make check || true
%post -n libcmuclmtk0 -p /sbin/ldconfig
%postun -n libcmuclmtk0 -p /sbin/ldconfig
%files
%defattr(-,root,root)
%doc AUTHORS README NEWS COPYING TODO
%{_bindir}/binlm2arpa
%{_bindir}/evallm
%{_bindir}/idngram2lm
%{_bindir}/idngram2stats
%{_bindir}/lm_combine
%{_bindir}/lm_interpolate
%{_bindir}/mergeidngram
%{_bindir}/ngram2mgram
%{_bindir}/text2idngram
%{_bindir}/text2wfreq
%{_bindir}/text2wngram
%{_bindir}/wfreq2vocab
%{_bindir}/wngram2idngram
%files -n libcmuclmtk0
%defattr(-,root,root)
%{_libdir}/libcmuclmtk.so.0
%{_libdir}/libcmuclmtk.so.0.0.0
%files -n libcmuclmtk-devel
%defattr(-,root,root)
%{_includedir}/cmuclmtk/
%{_libdir}/libcmuclmtk.so
%changelog

20
decl-mismatch.patch Normal file
View File

@ -0,0 +1,20 @@
Index: cmuclmtk-0.7/src/libs/rd_wlist_arry.c
===================================================================
--- cmuclmtk-0.7.orig/src/libs/rd_wlist_arry.c
+++ cmuclmtk-0.7/src/libs/rd_wlist_arry.c
@@ -41,13 +41,14 @@
#include <stdio.h>
#include <string.h>
#include "general.h"
+#include "sih.h"
#include "ac_parsetext.h"
/* allocate an lagre enough array and read in a list of words (first word on each line)
Leave entry no. 0 empty.
*/
-void read_wlist_into_array(char *wlist_filename, int verbosity, char ***p_wlist, int *p_n_wlist)
+void read_wlist_into_array(char *wlist_filename, int verbosity, char ***p_wlist, vocab_sz_t *p_n_wlist)
{
static char rname[]="read_wlist_into_array";
FILE *wlist_fp = rr_iopen(wlist_filename);