Squashed 'projects/bad-nlp/name-database/' content from commit 46d2e01a

git-subtree-dir: projects/bad-nlp/name-database
git-subtree-split: 46d2e01a4953b8f54e8625c9111cf5b372d80b49
This commit is contained in:
2023-03-20 18:03:18 -06:00
commit 2af495d056
669 changed files with 423076 additions and 0 deletions

View File

@@ -0,0 +1,59 @@
desc "imports the census 2000 data files and merged the data with the existing data files"
task "import:census1990" => "import:census1990:run"
namespace "import:census1990" do
task :run => [:db, "import:census1990:male", "import:census1990:female", "import:census1990:family"] do
@db.write
end
task :male => :db do
open("sources/census-1990/dist.male.first", 'r') do |file|
file.each do |line|
fields = line.split(/\s+/)
name = fields.first
entry = @db.given_names.get name
entry.meta[:gender] = case entry.meta[:gender]
when "female", "unisex" then
"unisex"
else
"male"
end
end
end
end
task :female => :db do
open("sources/census-1990/dist.female.first", 'r') do |file|
file.each do |line|
fields = line.split(/\s+/)
name = fields.first
entry = @db.given_names.get name
entry.meta[:gender] = case entry.meta[:gender]
when "male", "unisex" then
"unisex"
else
"female"
end
end
end
end
task :family => :db do
open("sources/census-1990/dist.all.last", 'r') do |file|
file.each do |line|
fields = line.split(/\s+/)
name = fields.first
@db.family_names.get name
end
end
end
end

View File

@@ -0,0 +1,20 @@
desc "imports the census 2000 data files and merged the data with the existing data files"
task "import:census2000" => "import:census2000:run"
namespace "import:census2000" do
task :run => :db do
open("sources/census-2000/app_c.csv", 'r') do |file|
file.gets #skip the header
file.each do |line|
fields = line.split(",")
name = fields.first
@db.family_names.get name
end
end
@db.write
end
end