[nat] wikipedia image scraper
This commit is contained in:
@@ -141,7 +141,6 @@ class CompilerTools {
|
|||||||
command = "python";
|
command = "python";
|
||||||
scriptExt = "py";
|
scriptExt = "py";
|
||||||
if (args.langProjectFile != null) {
|
if (args.langProjectFile != null) {
|
||||||
trace(args.langProjectFile);
|
|
||||||
// Make a virtual environment
|
// Make a virtual environment
|
||||||
// NOTE this is placed outside the output folder, so it will get reused.
|
// NOTE this is placed outside the output folder, so it will get reused.
|
||||||
// In some cases this might be bad if the virtual environment gets bad
|
// In some cases this might be bad if the virtual environment gets bad
|
||||||
|
@@ -19,6 +19,9 @@
|
|||||||
(systems.push system)
|
(systems.push system)
|
||||||
system)
|
system)
|
||||||
|
|
||||||
|
(method processSystems []
|
||||||
|
(doFor system systems (system.process this)))
|
||||||
|
|
||||||
(method :Entry createEntry [:Entry->Dynamic initializer] // initializer returns Dynamic so ->:Void isn't required
|
(method :Entry createEntry [:Entry->Dynamic initializer] // initializer returns Dynamic so ->:Void isn't required
|
||||||
(let [e (_newEntry)]
|
(let [e (_newEntry)]
|
||||||
(initializer e)
|
(initializer e)
|
||||||
|
@@ -4,6 +4,7 @@ import kiss.Prelude;
|
|||||||
import kiss.List;
|
import kiss.List;
|
||||||
import haxe.Constraints;
|
import haxe.Constraints;
|
||||||
import uuid.Uuid;
|
import uuid.Uuid;
|
||||||
|
import nat.systems.*;
|
||||||
|
|
||||||
enum CommandArgType {
|
enum CommandArgType {
|
||||||
SelectedEntry;
|
SelectedEntry;
|
||||||
|
@@ -134,6 +134,10 @@
|
|||||||
&mut :ChangeSet lastChangeSet []
|
&mut :ChangeSet lastChangeSet []
|
||||||
:Map<String,Command> commands (new Map)]
|
:Map<String,Command> commands (new Map)]
|
||||||
|
|
||||||
|
// Add systems!
|
||||||
|
(archive.addSystem (new WikipediaImageSystem))
|
||||||
|
(archive.processSystems)
|
||||||
|
|
||||||
(defCommand SelectEntry [e OneEntry]
|
(defCommand SelectEntry [e OneEntry]
|
||||||
(set selectedEntries [e]) [])
|
(set selectedEntries [e]) [])
|
||||||
|
|
||||||
@@ -180,11 +184,10 @@
|
|||||||
(defCommand SelectByComponents [componentsBoolExp (Text null)]
|
(defCommand SelectByComponents [componentsBoolExp (Text null)]
|
||||||
(SelectEntries (filter archive.entries ->e (componentsMatch e componentsBoolExp))))
|
(SelectEntries (filter archive.entries ->e (componentsMatch e componentsBoolExp))))
|
||||||
|
|
||||||
(defCommand AttachFiles [entries (SelectedEntries 1 null)
|
(defCommand AddFiles [entries (SelectedEntries 1 null)
|
||||||
// TODO add File and Files as an argument type for commands, ArchiveUI
|
// TODO add File and Files as an argument type for commands, ArchiveUI
|
||||||
// TODO make tkinter file browser externs and use tkinter as the file picking mechanism for CLI
|
// TODO make tkinter file browser externs and use tkinter as the file picking mechanism for CLI
|
||||||
files (VarText null)]
|
files (VarText null)]
|
||||||
(doFor e entries
|
(doFor e entries
|
||||||
(doFor file files
|
(addFiles archive e files))
|
||||||
(addFile archive e file)))
|
|
||||||
entries))
|
entries))
|
||||||
|
@@ -49,13 +49,14 @@
|
|||||||
`(let [,@bindingList
|
`(let [,@bindingList
|
||||||
,retValSymbol {,@body}]
|
,retValSymbol {,@body}]
|
||||||
,@saveList
|
,@saveList
|
||||||
|
(.refreshEntry ,archive ,e) // Check the entry in and out of systems when its components change
|
||||||
,retValSymbol)))
|
,retValSymbol)))
|
||||||
|
|
||||||
(defMacro withWritableEntry [archive e &body body]
|
(defMacro withWritableEntry [archive e &body body]
|
||||||
(let [retValSymbol
|
(let [retValSymbol
|
||||||
(symbol)]
|
(symbol)]
|
||||||
`(let [,retValSymbol {,@body}]
|
`(let [,retValSymbol {,@body}]
|
||||||
(archive.refreshEntry ,e)
|
(.refreshEntry ,archive ,e)
|
||||||
,retValSymbol)))
|
,retValSymbol)))
|
||||||
|
|
||||||
// Create a system that selects Entries according to a single string component (i.e. Name or Author) matching the given value
|
// Create a system that selects Entries according to a single string component (i.e. Name or Author) matching the given value
|
||||||
@@ -77,13 +78,14 @@
|
|||||||
(function componentsMatch [:nat.Entry e componentsBoolExp]
|
(function componentsMatch [:nat.Entry e componentsBoolExp]
|
||||||
(BoolExpInterp.eval componentsBoolExp (for =>cType cId e.components cType)))
|
(BoolExpInterp.eval componentsBoolExp (for =>cType cId e.components cType)))
|
||||||
|
|
||||||
(function addFile [:nat.Archive archive :nat.Entry e :String file &opt leaveOriginalCopy]
|
(function addFiles [:nat.Archive archive :nat.Entry e :Array<String> files &opt leaveOriginalCopy]
|
||||||
(withWritableEntry archive e
|
(withWritableEntry archive e
|
||||||
(let [pathWithoutDir (haxe.io.Path.withoutDirectory file)]
|
(doFor file files
|
||||||
(unless !(= -1 (e.files.indexOf pathWithoutDir))
|
(let [pathWithoutDir (haxe.io.Path.withoutDirectory file)]
|
||||||
((if leaveOriginalCopy sys.io.File.copy sys.FileSystem.rename)
|
(unless !(= -1 (e.files.indexOf pathWithoutDir))
|
||||||
file (joinPath archive.archiveDir "files" pathWithoutDir))
|
((if leaveOriginalCopy sys.io.File.copy sys.FileSystem.rename)
|
||||||
(e.files.push pathWithoutDir)))))
|
file (joinPath archive.archiveDir "files" pathWithoutDir))
|
||||||
|
(e.files.push pathWithoutDir))))))
|
||||||
|
|
||||||
(function addTags [:nat.Archive archive :nat.Entry e :Array<String> tagsToAdd]
|
(function addTags [:nat.Archive archive :nat.Entry e :Array<String> tagsToAdd]
|
||||||
(if (hasComponent e Tags)
|
(if (hasComponent e Tags)
|
||||||
|
@@ -34,10 +34,16 @@
|
|||||||
(assert response.ok)
|
(assert response.ok)
|
||||||
response.text))))
|
response.text))))
|
||||||
|
|
||||||
(defmethod queryImageTitles [:Array<String> pageTitles]
|
(method :Array<String> queryImageTitles [:Array<String> pageTitles]
|
||||||
(flatten (for =>_id page (the haxe.DynamicAccess<Dynamic> .pages .query (query [=>"titles" pageTitles =>"prop" ["images"]])) (page.images.map ->image image.title))))
|
(flatten
|
||||||
|
(for =>_id page (the haxe.DynamicAccess<Dynamic> .pages .query (query [=>"titles" pageTitles =>"prop" ["images"]]))
|
||||||
|
(if page.images
|
||||||
|
(page.images.map ->image image.title)
|
||||||
|
[]))))
|
||||||
|
|
||||||
(defmethod queryImageUrls [:Array<String> imageTitles]
|
(method :Array<String> queryImageUrls [:Array<String> imageTitles]
|
||||||
(flatten (for =>_id image (the haxe.DynamicAccess<Dynamic> .pages .query (query [=>"titles" imageTitles =>"prop" ["imageinfo"] =>"iiprop" ["url"]])) (image.imageinfo.map ->image image.url))))
|
(flatten
|
||||||
|
(for =>_id image (the haxe.DynamicAccess<Dynamic> .pages .query (query [=>"titles" imageTitles =>"prop" ["imageinfo"] =>"iiprop" ["url"]]))
|
||||||
|
(image.imageinfo.map ->image image.url))))
|
||||||
|
|
||||||
(var headers [=>"User-Agent" "NatArchiveTool/0.0.0 (https://github.com/NQNStudios/kisslang/tree/main/projects/nat-archive-tool; natquaylenelson@gmail.com) Requests/2.26.0"])
|
(var headers [=>"User-Agent" "NatArchiveTool/0.0.0 (https://github.com/NQNStudios/kisslang/tree/main/projects/nat-archive-tool; natquaylenelson@gmail.com) Requests/2.26.0"])
|
@@ -0,0 +1,10 @@
|
|||||||
|
package nat.systems;
|
||||||
|
|
||||||
|
import nat.systems.MediaWikiSystem;
|
||||||
|
import kiss.Prelude;
|
||||||
|
|
||||||
|
using haxe.io.Path;
|
||||||
|
using StringTools;
|
||||||
|
|
||||||
|
@:build(kiss.Kiss.build())
|
||||||
|
class WikipediaImageSystem extends MediaWikiSystem {}
|
@@ -0,0 +1,25 @@
|
|||||||
|
(load "../Lib.kiss")
|
||||||
|
|
||||||
|
(defNew []
|
||||||
|
(super
|
||||||
|
"https://en.wikipedia.org/w/api.php"
|
||||||
|
->[archive e] (tagsMatch archive e "(and media !wikipediaProcessed)")
|
||||||
|
scrapeForImages
|
||||||
|
1))
|
||||||
|
|
||||||
|
// named method in case a user will want to run it on selectedEntries instead of on media entries
|
||||||
|
(method scrapeForImages [archive e]
|
||||||
|
(let [:String title
|
||||||
|
(readComponent archive e Name)
|
||||||
|
:Array<String> wikipediaImageUrls
|
||||||
|
(queryImageUrls (queryImageTitles [title]))]
|
||||||
|
(doFor url wikipediaImageUrls
|
||||||
|
(assertProcess "wget" ["--directory-prefix=${archive.archiveDir}" url]))
|
||||||
|
(addFiles archive e
|
||||||
|
(for url wikipediaImageUrls
|
||||||
|
(joinPath archive.archiveDir
|
||||||
|
~(.replace
|
||||||
|
(.urlDecode (url.withoutDirectory))
|
||||||
|
// Some symbols shouldn't be decoded because they're invalid in file systems!
|
||||||
|
"\"" "%22"))))
|
||||||
|
(addTags archive e ["wikipediaProcessed"])))
|
@@ -1,11 +1 @@
|
|||||||
{
|
{"components":[["Tags","tags1"],["Name","name1"],["Author","author1"]],"files":["Adventure.mp3"],"id":"song1"}
|
||||||
"id": "song1",
|
|
||||||
"components": [
|
|
||||||
["Name", "name1"],
|
|
||||||
["Author", "author1"],
|
|
||||||
["Tags", "tags1"]
|
|
||||||
],
|
|
||||||
"files": [
|
|
||||||
"Adventure.mp3"
|
|
||||||
]
|
|
||||||
}
|
|
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Run these tests on every target that could be used for a NAT front-end
|
# Run these tests on every target that could be used for a NAT front-end
|
||||||
# (also to test (#extern) on multiple targets)
|
# (also to test (#extern) on multiple targets)
|
||||||
haxe test.hxml py.hxml &&
|
# haxe test.hxml py.hxml &&
|
||||||
haxe test.hxml js.hxml &&
|
# haxe test.hxml js.hxml &&
|
||||||
haxe test.hxml cpp.hxml &&
|
# haxe test.hxml cpp.hxml &&
|
||||||
haxe test.hxml --interp
|
haxe test.hxml --interp
|
||||||
|
Reference in New Issue
Block a user