# --------
# source records using a base, and a glob sequence
# --------

source ./page.nu
source ./grammar.nu

def "index shape" []: list<string> -> list<record<name: string, addr: string>> {
  $in | compact | each {|href|
  { name: (
    $href
    | url decode
    | path split
    | last 2
    | str join '_'
    | path parse
    | upsert stem { str replace -a ' ' '-' | str replace -ar '[\.\,]' '' }
    | path join
    ),
    addr: $href,
 }}
}

# from a page body, such as (page body -o),
# to a group of online addresses.
def "index links" [glob: string]: string -> list<string> {
  $in
  | pup 'a json{}'
  | from json
  | get -o href
  | where {|addr| ($addr | assure {''}) =~ $glob }
}

def "index rebase" [base: string, node: string] {
  if (
    $node | str starts-with 'http://'
  ) or (
    $node | str starts-with 'https://'
  ) { return $node }

  if ($node | str starts-with '/') {
    let domain = $base | url parse | update path '' | url join
    return ([$domain $node] | str join)
  }

  return ([$base $node] | path join)
}

def "index label" [base: string] {
  $base
  | url parse
  | get host
  | split row '.'
  | last 2
  | str join '_'
}

def "index map" [
  base: string,
  course: list<string>,
  --pace (-p): duration = 1sec,
  --label (-l): string,
  --done = true,
] {
  let bundle = $label | assure { index label $base }
  let cache = [ $bundle index yml ] | str join '.'
  if ($course | is-empty) { return [$base] | index shape }

  let pass = try {
    page open $base; sleep $pace
    page body -o
    | index links ($course | first)
    | each { index rebase $base $in }
  } catch {[]}
  | tee { page close -o }

  let $index = ([$base] ++ $pass) | index shape
  let deeper = $course | slice 1..

  $index | each {|page| $page | insert nodes {
    index map $page.addr $deeper -l $bundle -p $pace --done false
  } } | tee { if $done { $in | save $cache }}
}

def "index load" [base: string, --course (-c): list<string> = [], --label (-l): string] {
  let bundle = $label | assure { index label $base }
  let cache = [ $bundle index yml ] | str join '.'
  let error = [ $bundle error yml ] | str join '.'
  if not ($cache | path exists) { index map $base $course -l $bundle }

  mkdir $bundle
  open $cache
  | each {|n| $n | get -o nodes | assure { [$n] } }
  | flatten
  | each {|doc|
    let sink = [$bundle $doc.name] | path join
    if not ($sink | path exists) {
      print $"pulling ($sink)"
      try {
        wget $doc.addr -O $sink
      } catch {
        [$doc] | to yaml | save -a $error
      }
    }
  }
}
