Convert HallOfBeornDump to CSV for ALEP pipeline

This commit is contained in:
Christian Nieves
2023-07-19 00:26:00 -05:00
parent 35f48a157e
commit 6d9b688b51
6 changed files with 65 additions and 40 deletions

View File

@ -14,10 +14,24 @@ import (
"github.com/bazelbuild/rules_go/go/tools/bazel"
strip "github.com/grokify/html-strip-tags-go"
"github.com/jessevdk/go-flags"
"github.com/squk/lotr/cmd/beornextract/types"
)
type Options struct {
RawConversion bool `short:"r" long:"raw" description:"Enable to keep the original text from HallOfBeorn dump. Enable to prep for ALEP pipeline."`
}
var opts = Options{
RawConversion: false,
}
func main() {
_, err := flags.ParseArgs(&opts, os.Args)
if err != nil {
panic(err)
}
fmt.Println("LOTR CARD PARSE")
f, err := bazel.Runfile(".")
if err != nil {
@ -36,8 +50,8 @@ func main() {
}
// Open our jsonFile
jsonFile, err := os.Open("cmd/beornextract/data/Bot.Cards.json")
// jsonFile, err := bazel.Runfile("cmd/beornextract/data/Bot.Cards.json")
// jsonFile, err := os.Open("cmd/beornextract/data/Bot.Cards.json")
jsonFile, err := os.Open("cmd/beornextract/data/Export.Cards.json")
// if we os.Open returns an error then handle it
if err != nil {
fmt.Println(err)
@ -82,7 +96,7 @@ func main() {
card.TypeName,
card.SphereName,
card.Traits,
extractKeywords(card.Text),
findKeywords(card.Text),
card.Cost,
card.EngagementCost,
strconv.Itoa(card.Threat),
@ -90,8 +104,8 @@ func main() {
strconv.Itoa(card.Attack),
strconv.Itoa(card.Defense),
strconv.Itoa(card.Health),
"", // Quest Points
"", // Victory Points
card.QuestPoints,
strconv.Itoa(card.VictoryPoints),
"", // Special Icon
transformText(card.Name, card.Text),
card.Flavor,
@ -105,12 +119,18 @@ func main() {
}
func transformText(name, text string) string {
return strip.StripTags(
strings.ReplaceAll(text, name, "[name]"), // insert name tag
)
if opts.RawConversion {
return text
}
out := strings.ReplaceAll(text, name, "[name]") // insert name tag
out = strip.StripTags(out)
out = keywordPattern.ReplaceAllLiteralString(out, "")
return strings.TrimSpace(out)
}
func extractKeywords(text string) string {
pattern := regexp.MustCompile(`((?:(?:[A-Z][a-z]+(\.|\s[0-9]+\.)\s)+))`)
return strings.TrimSpace(pattern.FindString(text))
var keywordPattern = regexp.MustCompile(`((?:(?:[A-Z][a-z]+(\.|\s[0-9]+\.)\s*)+))`)
func findKeywords(text string) string {
return strings.TrimSpace(keywordPattern.FindString(text))
}