A script to retrieve a list of the most required skills for given vacancies from the website hh.ru.
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

159 lines
6.4 KiB

{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE DeriveGeneric #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE Strict #-}
--
-- Copyright (C) 2019, Maxim Lihachev, <envrm@yandex.ru>
--
-- This program is free software: you can redistribute it and/or modify it
-- under the terms of the GNU General Public License as published by the Free
-- Software Foundation, version 3.
--
-- This program is distributed in the hope that it will be useful, but WITHOUT
-- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
-- more details.
--
-- You should have received a copy of the GNU General Public License along with
-- this program. If not, see <https://www.gnu.org/licenses/>.
--
--
-- | Список наиболее востребованных навыков с сайта hh.ru
--
module Main (main) where
import Relude ((+), (-), (.), (++), ($), (=<<),
Int, String, Generic, Text, Show, IO,
show, return, quot, replicate, flip, compare,
on, snd, putStrLn, mapM_)
import Options.Applicative ((<$>), (<*>), (<**>), info, execParser, helper,
header, fullDesc, option, auto, long, short,
argument, metavar, showDefault, value, help, some, str)
import Control.Lens ((^.))
import Data.Semigroup ((<>))
import Data.List.Unique (count)
import Data.Aeson (FromJSON)
import Text.Printf (printf)
import Foreign.Marshal.Unsafe (unsafeLocalState)
import Control.Parallel.Strategies (parMap, rdeepseq)
import Network.Wreq (get, asJSON, responseBody)
import qualified Data.List as L
import qualified Data.Text as T
-- ------------------------------------------------------------------------------------
data Settings = Settings{ totalPages :: Int
, topSkills :: Int
, searchQuery :: [String] }
data Pages = Pages { pages :: Int , found :: Int } deriving (Generic)
newtype Vacancies = Vacancies { items :: [Vacancy] } deriving (Generic)
newtype Vacancy = Vacancy { id :: String } deriving (Generic)
newtype Skills = Skills { key_skills :: [Skill] } deriving (Generic)
newtype Skill = Skill { name :: Text } deriving (Generic)
type RatedSkill = (Text, Int)
instance FromJSON Pages
instance FromJSON Vacancies
instance FromJSON Vacancy
instance FromJSON Skills
instance FromJSON Skill
-- ------------------------------------------------------------------------------------
-- | Адрес HH API
hhAPI :: String
hhAPI = "https://api.hh.ru/vacancies"
-- | Запрос к странице hh.ru: https://api.hh.ru/vacancies?text=<query>&page=<number>
hhPage :: (FromJSON b, Show a) => String -> a -> IO b
hhPage query page = do
r <- asJSON =<< get (hhAPI ++ "?text='" ++ query ++ "'&page=" ++ show page)
return $ r ^. responseBody
-- | Список вакансий
getVacancies :: String -> Int -> [Vacancy]
getVacancies query n = items $ unsafeLocalState $ hhPage query n
-- | Список навыков
getSkills :: String -> IO [Text]
getSkills vacancy = do
r <- asJSON =<< get (hhAPI ++ "/" ++ vacancy)
return $ L.map name $ key_skills (r ^. responseBody)
--
-- | Список навыков из всех вакансий со страницы hh.ru
--
-- * Навыки, перечисленные через запятую, разделяются на отдельные слова.
-- * Все навыки приводятся в нижнем регистре.
--
-- [
-- "Linux",
-- "Terraform, Packer",
-- "Ansible"
-- ]
--
-- =>
--
-- [ "linux", "terraform", "packer", "ansible" ]
--
collectSkillsFromPage :: [Vacancy] -> [Text]
collectSkillsFromPage vids = L.concatMap unify allSkills
where allSkills = L.concatMap (unsafeLocalState . getSkills . Main.id) vids
unify skills = L.map (T.toLower . T.strip) $ T.splitOn "," skills
--
-- | Вывод полосы визуализации количества вхождений навыка
--
-- >>> bar 1 ("Linux", 167)
-- "1. [▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ ] 167: linux"
--
bar :: Int -> RatedSkill -> String
bar i (s, n) = printf "%3d. [%-50s ] %3d: %s" i (replicate x '▒') n s
where x = quot n 10 + 1 -- Отображение как минимум 1 деления шкалы
-- | Список уникальных навыков, отсортированых по количеству вхождений
sorted :: [Text] -> [RatedSkill]
sorted = L.sortBy (flip compare `on` snd) . count
-- | N наиболее востребованных навыков
top :: Int -> [Text] -> [RatedSkill]
top n l = L.take n $ sorted l
-- | Поиск навыков по конкретному запросу
getTopSkills :: Settings -> IO ()
getTopSkills (Settings getPages showLines searchQuery) =
let query = L.unwords searchQuery in
do p <- hhPage query 0
putStrLn $ printf "# По запросу '%s' найдено вакансий: %d\n" query (found p)
let pagesCount = L.minimum [pages p, getPages - 1] in
mapM_ (\(n, l) -> putStrLn $ bar n l)
$ L.zip [1..]
$ top showLines
$ L.concat
$ parMap rdeepseq (collectSkillsFromPage . getVacancies query) [0..pagesCount]
-- ------------------------------------------------------------------------------------
main :: IO ()
main = getTopSkills =<< execParser opts
where
opts = info (args <**> helper)
(fullDesc <> header "hhskills - поиск наиболее востребованных навыков по вакансиям hh.ru.")
args = Settings <$> option auto (long "pages" <> short 'p' <> metavar "PAGES"
<> showDefault <> value 10 <> help "Количество страниц для анализа")
<*> option auto (long "top" <> short 't' <> metavar "TOP"
<> showDefault <> value 20 <> help "Количество выводимых навыков")
<*> some (argument str (metavar "QUERY..."))