# HG changeset patch # User Hans-Günter Theisgen # Date 1658840247 -3600 # Node ID ffe30fb08ada6e3ccf650017cb79c6ac535c6298 # Parent 61cc62188020e02890bad4695395c190647774a0 updated tesseract and tesseract-dev (3.02.02 -> 5.2.0) diff -r 61cc62188020 -r ffe30fb08ada tesseract-ocr-dev/receipt --- a/tesseract-ocr-dev/receipt Mon Jul 25 14:25:43 2022 +0100 +++ b/tesseract-ocr-dev/receipt Tue Jul 26 13:57:27 2022 +0100 @@ -1,21 +1,18 @@ # SliTaz package receipt. PACKAGE="tesseract-ocr-dev" -VERSION="3.02.02" +VERSION="5.2.0" CATEGORY="development" SHORT_DESC="The most accurate open source OCR engine available, development files." MAINTAINER="pascal.bellard@slitaz.org" LICENSE="Apache" WEB_SITE="https://github.com/tesseract-ocr/tesseract" + +DEPENDS="pkg-config tesseract-ocr" WANTED="tesseract-ocr" -DEPENDS="tesseract-ocr pkg-config" - # Rules to gen a SliTaz package suitable for Tazpkg. genpkg_rules() { - mkdir -p $fs/usr/lib - cp -a $install/usr/include $fs/usr - cp -a $install/usr/lib/*a $fs/usr/lib - cp -a $install/usr/lib/pkgconfig $fs/usr/lib + get_dev_files } diff -r 61cc62188020 -r ffe30fb08ada tesseract-ocr/description.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tesseract-ocr/description.txt Tue Jul 26 13:57:27 2022 +0100 @@ -0,0 +1,28 @@ +This package contains an OCR engine - libtesseract and +a command line program - tesseract. + +Tesseract 4 adds a new neural net (LSTM) based OCR engine +which is focused on line recognition, but also still +supports the legacy Tesseract OCR engine of Tesseract 3 +which works by recognizing character patterns. +Compatibility with Tesseract 3 is enabled by using the +Legacy OCR Engine mode (--oem 0). +It also needs traineddata files which support the legacy +engine, for example those from the tessdata repository. + +The lead developer is Ray Smith. The maintainer is Zdenko +Podobny. +For a list of contributors see AUTHORS and GitHub's log +of contributors. + +Tesseract has unicode (UTF-8) support, and can recognize +more than 100 languages "out of the box". + +Tesseract supports various output formats: plain text, +hOCR (HTML), PDF, invisible-text-only PDF, TSV. +The main branch also has experimental support for ALTO +(XML) output. + +You should note that in many cases, in order to get better +OCR results, you'll need to improve the quality of the +image you are giving Tesseract. diff -r 61cc62188020 -r ffe30fb08ada tesseract-ocr/receipt --- a/tesseract-ocr/receipt Mon Jul 25 14:25:43 2022 +0100 +++ b/tesseract-ocr/receipt Tue Jul 26 13:57:27 2022 +0100 @@ -1,18 +1,19 @@ # SliTaz package receipt. PACKAGE="tesseract-ocr" -VERSION="3.02.02" +VERSION="5.2.0" CATEGORY="office" SHORT_DESC="The most accurate open source OCR engine available." MAINTAINER="pascal.bellard@slitaz.org" LICENSE="Apache" WEB_SITE="https://github.com/tesseract-ocr/tesseract" + TARBALL="$PACKAGE-$VERSION.tar.gz" -WGET_URL="https://github.com/tesseract-ocr/tesseract/archive/refs/tags/$VERSION.tar.gz" +WGET_URL="$WEB_SITE/archive/$VERSION.tar.gz" -DEPENDS="leptonica libpng jpeg tiff giflib" -BUILD_DEPENDS="libtool autoconf automake libpng-dev jpeg-dev tiff-dev \ -giflib-dev zlib-dev icu-dev pango-dev cairo-dev leptonica-dev" +DEPENDS="gcc83-lib-base giflib jpeg leptonica libpng tiff" +BUILD_DEPENDS="autoconf automake cairo-dev gcc83 giflib-dev icu-dev jpeg-dev + leptonica-dev libpng-dev libtool pango-dev tiff-dev zlib-dev" # What is the latest version available today? current_version() @@ -24,19 +25,24 @@ # Rules to configure and make the package. compile_rules() { - ./autogen.sh - ./configure \ - --prefix=/usr \ + # 5.2.0 avoid undefined symbol: GOMP_parallel at runtime + # by disable-openmp + + ./autogen.sh && + ./configure \ + CC=gcc-83 \ + CXX=g++-83 \ + --prefix=/usr \ + --disable-openmp \ $CONFIGURE_ARGS && - make && make install + make && + make install } # Rules to gen a SliTaz package suitable for Tazpkg. genpkg_rules() { - mkdir -p $fs/usr/lib - cp -a $install/usr/bin $fs/usr - cp -a $install/usr/share $fs/usr - rm -rf $fs/usr/share/man - cp -a $install/usr/lib/*.so* $fs/usr/lib + cook_copy_folders bin + cook_copy_folders tessdata + cook_copy_files *.so* } \ No newline at end of file