tesseract: Allow to specify a subset of languages.
Especially useful for our OCR based VM tests, where we only need the english language. By default the argument is null so all languages are included. If a list of language name is passed only those languages are enabled, for example: tesseract.override { enableLanguages = [ "eng" "spa" ]; }; To only enable support for English and Spanish languages. Signed-off-by: aszlig <aszlig@redmoonstudios.org>
This commit is contained in:
parent
8be00dc71d
commit
adb7581459
1 changed files with 23 additions and 18 deletions
|
@ -1,26 +1,31 @@
|
|||
{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff }:
|
||||
{ stdenv, fetchurl, autoconf, automake, libtool, leptonica, libpng, libtiff
|
||||
, enableLanguages ? null
|
||||
}:
|
||||
|
||||
with stdenv.lib;
|
||||
|
||||
let
|
||||
majVersion = "3.02";
|
||||
version = "${majVersion}.02";
|
||||
|
||||
f = lang : sha256 : let
|
||||
src = fetchurl {
|
||||
url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
|
||||
inherit sha256;
|
||||
};
|
||||
in
|
||||
"tar xfvz ${src} -C $out/share/ --strip=1";
|
||||
mkLang = lang: sha256: let
|
||||
src = fetchurl {
|
||||
url = "http://tesseract-ocr.googlecode.com/files/tesseract-ocr-${majVersion}.${lang}.tar.gz";
|
||||
inherit sha256;
|
||||
};
|
||||
in "tar xfvz ${src} -C $out/share/ --strip=1";
|
||||
|
||||
extraLanguages = ''
|
||||
${f "cat" "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9"}
|
||||
${f "rus" "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709"}
|
||||
${f "spa" "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l"}
|
||||
${f "nld" "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy"}
|
||||
${f "eng" "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461"}
|
||||
${f "slv" "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr"}
|
||||
${f "jpn" "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9"}
|
||||
'';
|
||||
wantLang = name: const (enableLanguages == null || elem name enableLanguages);
|
||||
|
||||
extraLanguages = mapAttrsToList mkLang (filterAttrs wantLang {
|
||||
cat = "0d1smiv1b3k9ay2s05sl7q08mb3ln4w5iiiymv2cs8g8333z8jl9";
|
||||
rus = "059336mkhsj9m3hwfb818xjlxkcdpy7wfgr62qwz65cx914xl709";
|
||||
spa = "1c9iza5mbahd9pa7znnq8yv09v5kz3gbd2sarcgcgc1ps1jc437l";
|
||||
nld = "162acxp1yb6gyki2is3ay2msalmfcsnrlsd9wml2ja05k94m6bjy";
|
||||
eng = "1y5xf794n832s3lymzlsdm2s9nlrd2v27jjjp0fd9xp7c2ah4461";
|
||||
slv = "0rqng43435cly32idxm1lvxkcippvc3xpxbfizwq5j0155ym00dr";
|
||||
jpn = "07v8pymd0iwyzh946lxylybda20gsw7p4fsb09jw147955x49gq9";
|
||||
});
|
||||
in
|
||||
|
||||
stdenv.mkDerivation rec {
|
||||
|
@ -40,7 +45,7 @@ stdenv.mkDerivation rec {
|
|||
'LIBLEPT_HEADERSDIR=${leptonica}/include'
|
||||
'';
|
||||
|
||||
postInstall = extraLanguages;
|
||||
postInstall = concatStrings extraLanguages;
|
||||
|
||||
meta = {
|
||||
description = "OCR engine";
|
||||
|
|
Loading…
Reference in a new issue