Sources urban dictionary : https://www.kaggle.com/therohk/urban-dictionary-words-dataset/ Wiki popularity : https://figshare.com/articles/WikiRank_05_2019_-_quality_scores_popularity_and_AI_for_Wikipedia_articles/8231273/2 Unix sort uniq R 'small' -> file wiki <- read.table(file, header = TRUE, sep = '\t', quote = '') # sort wikisort.bot <- wiki[order(wiki$popularity),] wikisort.top <- wiki[order(wiki$popularity, decreasing = TRUE),] head(wikisort.bot) head(wikisort.top) plot(wiki$popularity, wiki$authors_interest) # TODO linear regression # hexbin ?? # outliers wiki[which((wiki$popularity > 1700 & wiki$authors_interest < 10)), ]