- commit
- 976be77
- parent
- c06c969
- author
- Eric Bower
- date
- 2021-07-19 16:17:26 +0000 UTC
updating etl processes
6 files changed,
+177,
-100
+1,
-0
1@@ -11,6 +11,7 @@
2 "lint": "prettier --check --plugin-search-dir=. . && eslint --ignore-path .gitignore .",
3 "format": "prettier --write --plugin-search-dir=. .",
4 "prepare": "husky install",
5+ "scrape": "node --loader ts-node/esm src/scrape.ts",
6 "process": "node --loader ts-node/esm src/process.ts",
7 "transform": "node --loader ts-node/esm src/transform.ts",
8 "upload:clean": "gsutil -m rm -r gs://neovim.erock.io/*",
+32,
-0
1@@ -0,0 +1,32 @@
2+import type { Plugin, Resource } from './types';
3+
4+export const createPlugin = (p: Partial<Plugin> = {}): Plugin => {
5+ return {
6+ id: '',
7+ name: '',
8+ username: '',
9+ repo: '',
10+ link: '',
11+ tags: [],
12+ homepage: '',
13+ description: '',
14+ branch: 'main',
15+ openIssues: 0,
16+ watchers: 0,
17+ forks: 0,
18+ stars: 0,
19+ subscribers: 0,
20+ network: 0,
21+ ...p,
22+ };
23+};
24+
25+export function createResource(p: Partial<Resource> = {}): Resource {
26+ return {
27+ type: 'github',
28+ username: '',
29+ repo: '',
30+ tags: [],
31+ ...p,
32+ };
33+}
+1,
-0
1@@ -0,0 +1 @@
2+{ "resources": [] }
+9,
-0
1@@ -24,3 +24,12 @@ export interface Tag {
2 }
3
4 export type TagMap = { [key: string]: Tag };
5+
6+export interface Resource {
7+ type: 'github';
8+ username: string;
9+ repo: string;
10+ tags: string[];
11+}
12+
13+export type ResourceMap = { [key: string]: Resource };
+49,
-100
1@@ -1,9 +1,9 @@
2-import fetch from 'node-fetch';
3-import marked from 'marked';
4 import fs from 'fs';
5 import util from 'util';
6
7-import type { Plugin } from './lib/types';
8+import type { Plugin, Resource } from './lib/types';
9+import { createPlugin } from './lib/entities';
10+import * as resourceFile from './lib/resources.json';
11
12 const writeFile = util.promisify(fs.writeFile);
13 const accessToken = process.env.GITHUB_ACCESS_TOKEN || '';
14@@ -12,10 +12,13 @@ const accessUsername = process.env.GITHUB_USERNAME || '';
15 interface Props {
16 username: string;
17 repo: string;
18- branch: string;
19 }
20
21-async function fetchReadme({ username, repo, branch }: Props): Promise<Resp<string>> {
22+async function fetchReadme({
23+ username,
24+ repo,
25+ branch,
26+}: Props & { branch: string }): Promise<Resp<string>> {
27 const url = `https://raw.githubusercontent.com/${username}/${repo}/${branch}/README.md`;
28 console.log(`Fetching ${url}`);
29 const res = await fetch(url);
30@@ -89,107 +92,53 @@ async function fetchGithubData(props: Props): Promise<Resp<any>> {
31 };
32 }
33
34-async function fetchMarkdown() {
35- const response = await fetch(
36- 'https://raw.githubusercontent.com/rockerBOO/awesome-neovim/main/README.md',
37- );
38- const text = await response.text();
39- return text;
40-}
41-
42-const createPlugin = (p: Partial<Plugin> = {}): Plugin => {
43- return {
44- id: '',
45- name: '',
46- username: '',
47- repo: '',
48- link: '',
49- tags: [],
50- homepage: '',
51- description: '',
52- branch: 'main',
53- openIssues: 0,
54- watchers: 0,
55- forks: 0,
56- stars: 0,
57- subscribers: 0,
58- network: 0,
59- ...p,
60- };
61-};
62-
63-function sanitizeTag(tag: string) {
64- if (tag === '(requires neovim 0.5)') return 'neovim-0.5';
65- if (tag === 'treesitter supported colorschemes') return 'treesitter-colorschemes';
66- return tag.toLocaleLowerCase().replace(/\s/g, '-');
67-}
68-
69-async function processMarkdown(text: string) {
70- const data: Plugin[] = [];
71- const tree = marked.lexer(text);
72- let heading = '';
73- tree.forEach((token) => {
74- if (token.type === 'heading') {
75- heading = token.text.toLocaleLowerCase();
76- }
77-
78- if (token.type === 'list') {
79- token.items.forEach((t) => {
80- (t as any).tokens.forEach((tt: any) => {
81- if (!tt.tokens) return;
82- if (heading === 'contents') return;
83- const plugin = createPlugin({ tags: [sanitizeTag(heading)] });
84- tt.tokens.forEach((a: any) => {
85- if (a.type === 'link') {
86- plugin.link = a.href;
87- const href = a.href
88- .replace('https://github.com/', '')
89- .replace('http://github.com', '');
90- const d = href.split('/');
91- plugin.username = d[0];
92- plugin.repo = d[1];
93- plugin.id = `${d[0]}/${d[1]}`;
94- }
95- });
96- if (!plugin.link.includes('github.com')) return;
97- data.push(plugin);
98- });
99- });
100- }
101- });
102-
103+async function processResources(resources: Resource[]) {
104 const plugins: { [key: string]: Plugin } = {};
105 const markdown: { [key: string]: string } = {};
106- for (let i = 0; i < data.length; i += 1) {
107- const d = data[i];
108- const result = await fetchGithubData(d);
109- if (result.ok) {
110- const resp = result.data;
111- const id = `${d.username}/${d.repo}`;
112-
113- markdown[id] = resp.readme;
114- plugins[id] = createPlugin({
115- id,
116- username: d.username,
117- repo: d.repo,
118- name: d.name,
119- link: d.link,
120- tags: d.tags,
121- homepage: resp.repo.homepage,
122- branch: resp.repo.default_branch,
123- openIssues: resp.repo.open_issues_count,
124- watchers: resp.repo.watchers_count,
125- forks: resp.repo.forks,
126- stars: resp.repo.stargazers_count,
127- subscribers: resp.repo.subscribers_count,
128- network: resp.repo.network_count,
129- description: resp.repo.description,
130- });
131+ for (let i = 0; i < resources.length; i += 1) {
132+ const d = resources[i];
133+
134+ if (d.type === 'github') {
135+ const result = await fetchGithubData(d);
136+ if (result.ok) {
137+ const resp = result.data;
138+ const id = `${d.username}/${d.repo}`;
139+
140+ markdown[id] = resp.readme;
141+ plugins[id] = createPlugin({
142+ id,
143+ username: d.username,
144+ repo: d.repo,
145+ tags: d.tags,
146+ name: resp.repo.name,
147+ link: resp.repo.html_url,
148+ homepage: resp.repo.homepage,
149+ branch: resp.repo.default_branch,
150+ openIssues: resp.repo.open_issues_count,
151+ watchers: resp.repo.watchers_count,
152+ forks: resp.repo.forks,
153+ stars: resp.repo.stargazers_count,
154+ subscribers: resp.repo.subscribers_count,
155+ network: resp.repo.network_count,
156+ description: resp.repo.description,
157+ });
158+ }
159 }
160 }
161
162+ return { plugins, markdown };
163+}
164+
165+async function saveData({
166+ plugins,
167+ markdown,
168+}: {
169+ plugins: { [key: string]: Plugin };
170+ markdown: { [key: string]: string };
171+ resources: Resource[];
172+}) {
173 await writeFile('./src/lib/db.json', JSON.stringify({ plugins }));
174 await writeFile('./src/lib/markdown.json', JSON.stringify({ markdown }));
175 }
176
177-fetchMarkdown().then(processMarkdown).catch(console.error);
178+processResources(resourceFile.resources).then(saveData).catch(console.error);
+85,
-0
1@@ -0,0 +1,85 @@
2+import fs from 'fs';
3+import util from 'util';
4+
5+import fetch from 'node-fetch';
6+import marked from 'marked';
7+
8+import type { Resource, ResourceMap } from './lib/types';
9+import { createResource } from './lib/entities';
10+import * as resourceFile from './lib/resources.json';
11+
12+const writeFile = util.promisify(fs.writeFile);
13+
14+async function fetchMarkdown(url: string) {
15+ const response = await fetch(url);
16+ const text = await response.text();
17+ return text;
18+}
19+
20+function sanitizeTag(tag: string) {
21+ if (tag === '(requires neovim 0.5)') return 'neovim-0.5';
22+ if (tag === 'treesitter supported colorschemes') return 'treesitter-colorschemes';
23+ return tag.toLocaleLowerCase().replace(/\s/g, '-');
24+}
25+
26+async function processMarkdown(text: string) {
27+ const resources: Resource[] = [];
28+ const tree = marked.lexer(text);
29+ let heading = '';
30+ tree.forEach((token) => {
31+ if (token.type === 'heading') {
32+ heading = token.text.toLocaleLowerCase();
33+ }
34+
35+ if (token.type === 'list') {
36+ token.items.forEach((t) => {
37+ (t as any).tokens.forEach((tt: any) => {
38+ if (!tt.tokens) return;
39+ if (heading === 'contents') return;
40+ const resource = createResource({ tags: [sanitizeTag(heading)] });
41+ let link = '';
42+ tt.tokens.forEach((a: any) => {
43+ if (a.type === 'link') {
44+ link = a.href;
45+ const href = a.href
46+ .replace('https://github.com/', '')
47+ .replace('http://github.com', '');
48+ const d = href.split('/');
49+ resource.username = d[0];
50+ resource.repo = d[1];
51+ }
52+ });
53+ if (!link.includes('github.com')) return;
54+ resources.push(resource);
55+ });
56+ });
57+ }
58+ });
59+
60+ return resources;
61+}
62+
63+async function updateResources(resources: Resource[]) {
64+ const db: ResourceMap = {};
65+ const getId = (r: Resource) => `${r.username}/${r.repo}`;
66+ resourceFile.resources.forEach((r) => {
67+ db[getId(r)] = r;
68+ });
69+ resources.forEach((r) => {
70+ db[getId(r)] = r;
71+ });
72+
73+ await writeFile('./src/lib/resources.json', JSON.stringify({ resources: Object.values(db) }));
74+}
75+
76+const urls = ['https://raw.githubusercontent.com/rockerBOO/awesome-neovim/main/README.md'];
77+Promise.all(urls.map((url) => fetchMarkdown(url).then(processMarkdown)))
78+ .then((resources) => {
79+ const flatten = resources.reduce((acc, r) => {
80+ acc.push(...r);
81+ return acc;
82+ }, []);
83+ return flatten;
84+ })
85+ .then(updateResources)
86+ .catch(console.error);