You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
159 lines
6.4 KiB
159 lines
6.4 KiB
{-# LANGUAGE NoImplicitPrelude #-} |
|
{-# LANGUAGE DeriveGeneric #-} |
|
{-# LANGUAGE OverloadedStrings #-} |
|
{-# LANGUAGE ScopedTypeVariables #-} |
|
{-# LANGUAGE Strict #-} |
|
|
|
-- |
|
-- Copyright (C) 2019, Maxim Lihachev, <envrm@yandex.ru> |
|
-- |
|
-- This program is free software: you can redistribute it and/or modify it |
|
-- under the terms of the GNU General Public License as published by the Free |
|
-- Software Foundation, version 3. |
|
-- |
|
-- This program is distributed in the hope that it will be useful, but WITHOUT |
|
-- ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
|
-- FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
|
-- more details. |
|
-- |
|
-- You should have received a copy of the GNU General Public License along with |
|
-- this program. If not, see <https://www.gnu.org/licenses/>. |
|
-- |
|
|
|
-- |
|
-- | Список наиболее востребованных навыков с сайта hh.ru |
|
-- |
|
module Main (main) where |
|
|
|
import Relude ((+), (-), (.), (++), ($), (=<<), |
|
Int, String, Generic, Text, Show, IO, |
|
show, return, quot, replicate, flip, compare, |
|
on, snd, putStrLn, mapM_) |
|
|
|
import Options.Applicative ((<$>), (<*>), (<**>), info, execParser, helper, |
|
header, fullDesc, option, auto, long, short, |
|
argument, metavar, showDefault, value, help, some, str) |
|
|
|
import Control.Lens ((^.)) |
|
import Data.Semigroup ((<>)) |
|
import Data.List.Unique (count) |
|
import Data.Aeson (FromJSON) |
|
import Text.Printf (printf) |
|
import Foreign.Marshal.Unsafe (unsafeLocalState) |
|
import Control.Parallel.Strategies (parMap, rdeepseq) |
|
import Network.Wreq (get, asJSON, responseBody) |
|
|
|
import qualified Data.List as L |
|
import qualified Data.Text as T |
|
|
|
-- ------------------------------------------------------------------------------------ |
|
|
|
data Settings = Settings{ totalPages :: Int |
|
, topSkills :: Int |
|
, searchQuery :: [String] } |
|
|
|
data Pages = Pages { pages :: Int , found :: Int } deriving (Generic) |
|
|
|
newtype Vacancies = Vacancies { items :: [Vacancy] } deriving (Generic) |
|
newtype Vacancy = Vacancy { id :: String } deriving (Generic) |
|
newtype Skills = Skills { key_skills :: [Skill] } deriving (Generic) |
|
newtype Skill = Skill { name :: Text } deriving (Generic) |
|
|
|
type RatedSkill = (Text, Int) |
|
|
|
instance FromJSON Pages |
|
instance FromJSON Vacancies |
|
instance FromJSON Vacancy |
|
instance FromJSON Skills |
|
instance FromJSON Skill |
|
|
|
-- ------------------------------------------------------------------------------------ |
|
|
|
-- | Адрес HH API |
|
hhAPI :: String |
|
hhAPI = "https://api.hh.ru/vacancies" |
|
|
|
-- | Запрос к странице hh.ru: https://api.hh.ru/vacancies?text=<query>&page=<number> |
|
hhPage :: (FromJSON b, Show a) => String -> a -> IO b |
|
hhPage query page = do |
|
r <- asJSON =<< get (hhAPI ++ "?text='" ++ query ++ "'&page=" ++ show page) |
|
return $ r ^. responseBody |
|
|
|
-- | Список вакансий |
|
getVacancies :: String -> Int -> [Vacancy] |
|
getVacancies query n = items $ unsafeLocalState $ hhPage query n |
|
|
|
-- | Список навыков |
|
getSkills :: String -> IO [Text] |
|
getSkills vacancy = do |
|
r <- asJSON =<< get (hhAPI ++ "/" ++ vacancy) |
|
return $ L.map name $ key_skills (r ^. responseBody) |
|
|
|
-- |
|
-- | Список навыков из всех вакансий со страницы hh.ru |
|
-- |
|
-- * Навыки, перечисленные через запятую, разделяются на отдельные слова. |
|
-- * Все навыки приводятся в нижнем регистре. |
|
-- |
|
-- [ |
|
-- "Linux", |
|
-- "Terraform, Packer", |
|
-- "Ansible" |
|
-- ] |
|
-- |
|
-- => |
|
-- |
|
-- [ "linux", "terraform", "packer", "ansible" ] |
|
-- |
|
collectSkillsFromPage :: [Vacancy] -> [Text] |
|
collectSkillsFromPage vids = L.concatMap unify allSkills |
|
where allSkills = L.concatMap (unsafeLocalState . getSkills . Main.id) vids |
|
unify skills = L.map (T.toLower . T.strip) $ T.splitOn "," skills |
|
|
|
-- |
|
-- | Вывод полосы визуализации количества вхождений навыка |
|
-- |
|
-- >>> bar 1 ("Linux", 167) |
|
-- "1. [▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒▒ ] 167: linux" |
|
-- |
|
bar :: Int -> RatedSkill -> String |
|
bar i (s, n) = printf "%3d. [%-50s ] %3d: %s" i (replicate x '▒') n s |
|
where x = quot n 10 + 1 -- Отображение как минимум 1 деления шкалы |
|
|
|
-- | Список уникальных навыков, отсортированых по количеству вхождений |
|
sorted :: [Text] -> [RatedSkill] |
|
sorted = L.sortBy (flip compare `on` snd) . count |
|
|
|
-- | N наиболее востребованных навыков |
|
top :: Int -> [Text] -> [RatedSkill] |
|
top n l = L.take n $ sorted l |
|
|
|
-- | Поиск навыков по конкретному запросу |
|
getTopSkills :: Settings -> IO () |
|
getTopSkills (Settings getPages showLines searchQuery) = |
|
let query = L.unwords searchQuery in |
|
do p <- hhPage query 0 |
|
|
|
putStrLn $ printf "# По запросу '%s' найдено вакансий: %d\n" query (found p) |
|
|
|
let pagesCount = L.minimum [pages p, getPages - 1] in |
|
mapM_ (\(n, l) -> putStrLn $ bar n l) |
|
$ L.zip [1..] |
|
$ top showLines |
|
$ L.concat |
|
$ parMap rdeepseq (collectSkillsFromPage . getVacancies query) [0..pagesCount] |
|
|
|
-- ------------------------------------------------------------------------------------ |
|
|
|
main :: IO () |
|
main = getTopSkills =<< execParser opts |
|
where |
|
opts = info (args <**> helper) |
|
(fullDesc <> header "hhskills - поиск наиболее востребованных навыков по вакансиям hh.ru.") |
|
|
|
args = Settings <$> option auto (long "pages" <> short 'p' <> metavar "PAGES" |
|
<> showDefault <> value 10 <> help "Количество страниц для анализа") |
|
<*> option auto (long "top" <> short 't' <> metavar "TOP" |
|
<> showDefault <> value 20 <> help "Количество выводимых навыков") |
|
<*> some (argument str (metavar "QUERY...")) |
|
|
|
|