From 164b056d6ce049e1f5ae34e6781f45bb17cdf3b6 Mon Sep 17 00:00:00 2001 From: Maxim Likhachev Date: Mon, 24 Feb 2020 16:33:50 +0300 Subject: [PATCH] =?UTF-8?q?=D0=9E=D0=BF=D1=80=D0=B5=D0=B4=D0=B5=D0=BB?= =?UTF-8?q?=D0=B5=D0=BD=D0=B8=D0=B5=20=D1=8F=D0=B7=D1=8B=D0=BA=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- press-n-post | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/press-n-post b/press-n-post index ec28925..dbcae67 100755 --- a/press-n-post +++ b/press-n-post @@ -5,8 +5,6 @@ package require Img package require fileutil package require platform -# TODO: Определение языка по классу символов, использованных в названии - #---------------------------------------------------------------------- # # Copyright (C) 2019, Maxim Lihachev, @@ -76,7 +74,7 @@ namespace eval pdf { category: Исследования pages: year: - language: RUS + language: $::pdf::language filesize: $::pdf::tags(File Size) pdf: $::pdf::file tags: @@ -131,6 +129,26 @@ namespace eval pdf { } } + # Определение языка по названию + proc guess_language {title} { + # Базовый набор символов + set eng {[a-z[:space:][:punct:]]} + + switch -nocase -regexp $title "^${eng}*\[äöüß]+${eng}*" { + set lang DEU + } "^${eng}*\[àâæçéèêëîïôœùûüÿ]+${eng}*" { + set lang FRA + } "^${eng}*\[а-яћџђљњčćžđš]+${eng}*" { + set lang SRB + } "^${eng}*\[а-я]+${eng}*" { + set lang RUS + } "^${eng}+" { + set lang ENG + } default { + set lang RUS + } + } + # Получение информации о PDF-файле proc load {filename} { set file_basename [file rootname [file tail $filename]] @@ -152,6 +170,7 @@ namespace eval pdf { set ::pdf::file_rootname [file rootname $filename] set ::pdf::title $title set ::pdf::authors [authors_list $authors] + set ::pdf::language [guess_language $file_basename] set ::pdf::canonical_authors [canonical_authors $authors] read_tags