|
|
|
@ -22,41 +22,57 @@ declare -A URLS=(
@@ -22,41 +22,57 @@ declare -A URLS=(
|
|
|
|
|
["ru"]="https://vandrouki.ru" |
|
|
|
|
) |
|
|
|
|
|
|
|
|
|
rstc="\033[00m" |
|
|
|
|
yellow="\033[1;33m" |
|
|
|
|
blue="\033[1;34m" |
|
|
|
|
green="\033[1;32m" |
|
|
|
|
|
|
|
|
|
get_info() { |
|
|
|
|
echo -e "$green>> $1 $rstc\n" |
|
|
|
|
echo -e ">> $1 $rstc\n" |
|
|
|
|
|
|
|
|
|
curl -s -q "$1" \ |
|
|
|
|
| sed 's/<html.*xmlns.*$//g' \ |
|
|
|
|
| xmllint --html --xpath "//html/body/div[@id='content']/div[@id='primary']/div/*[self::div/span[@class='published'] or self::h2/a]" - 2>&- \ |
|
|
|
|
| awk 'BEGIN { RS="" } { |
|
|
|
|
gsub(/\n+/, "@"); |
|
|
|
|
gsub(/<h2 class="entry-title">/, "\n"); |
|
|
|
|
gsub(/(href=")|(" rel=[^>]*>)|(published.>)/, "@"); |
|
|
|
|
gsub(/(&#[0-9]+;)|(<[^@>]*>*)/, ""); |
|
|
|
|
gsub(/@\s*@*/,"@"); |
|
|
|
|
gsub(/\r/,""); |
|
|
|
|
|
|
|
|
|
print |
|
|
|
|
}' \ |
|
|
|
|
| awk -F'@' -v blue="$blue" -v yellow="$yellow" -v green="$green" -v rstc="$rstc" ' |
|
|
|
|
/^\s*$/ { |
|
|
|
|
next |
|
|
|
|
} { |
|
|
|
|
if ($4 ~ /^\s*$/) { |
|
|
|
|
date = $5 |
|
|
|
|
| awk \ |
|
|
|
|
'BEGIN { |
|
|
|
|
RS=""; |
|
|
|
|
text=""; |
|
|
|
|
|
|
|
|
|
rstc="\033[00m"; |
|
|
|
|
yellow="\033[1;33m"; |
|
|
|
|
blue="\033[1;34m"; |
|
|
|
|
green="\033[1;32m"; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
function tokenize(html) { |
|
|
|
|
gsub(/\n+/, "@", html); |
|
|
|
|
gsub(/<h2 class="entry-title">/, "\n", html); |
|
|
|
|
gsub(/(href=")|(" rel=[^>]*>)|(published.>)/, "@", html); |
|
|
|
|
gsub(/(&#[0-9]+;)|(<[^@>]*>*)/, "", html); |
|
|
|
|
gsub(/@\s*@*/, "@", html); |
|
|
|
|
gsub(/\r/, "", html); |
|
|
|
|
|
|
|
|
|
return html; |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
{ |
|
|
|
|
text = text"\n"tokenize($0); |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
END { |
|
|
|
|
split(text, lines, "\n") |
|
|
|
|
|
|
|
|
|
for (line in lines) { |
|
|
|
|
split(lines[line], article, "@") |
|
|
|
|
|
|
|
|
|
if (article[4] ~ /^\s*$/) { |
|
|
|
|
date = article[5] |
|
|
|
|
} else { |
|
|
|
|
date = $4 |
|
|
|
|
date = article[4] |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
print blue " # " date "\t" yellow $3 "\n\t\t" green $2 rstc |
|
|
|
|
}' |
|
|
|
|
if (article[3] !~ /^\s*$/) { |
|
|
|
|
print blue " # " date "\t" yellow article[3] "\n\t\t" green article[2] rstc |
|
|
|
|
} |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
echo -e "\n--------------------------------\n" |
|
|
|
|
print "\n--------------------------------\n" |
|
|
|
|
}' |
|
|
|
|
} |
|
|
|
|
|
|
|
|
|
domains="${*:-.}" |
|
|
|
|