diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000..330f0c8 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "go-epub"] + path = go-epub + url = https://github.com/go-shiori/go-epub diff --git a/createepub/createepub.go b/createepub/createepub.go new file mode 100644 index 0000000..ee4e6d4 --- /dev/null +++ b/createepub/createepub.go @@ -0,0 +1,181 @@ +package createepub + +import ( + "encoding/json" + "fmt" + "io" + "log" + "mangacrawler/mangacrawler" + "net/http" + "os" + "os/user" + "regexp" + "strings" + + "github.com/go-shiori/go-epub" +) + +type MangaPlus struct { + Data MangaRelationships `json:"data"` +} + +type MangaRelationships struct { + Rels []MangaRels `json:"relationships"` + Attr MangaAttributes `json:"attributes"` +} + +type MangaRels struct { + Attributes MangaAuthor `json:"attributes"` + Type string `json:"type"` +} + +type MangaAuthor struct { + Name string `json:"name"` + File string `json:"fileName"` +} + +type MangaAttributes struct { + Desc MangaDesc `json:"description"` +} + +type MangaDesc struct { + En string `json:"en"` +} + +func CreateEpub(mangaPath string, mangaTitle string, mangaId string) { + var author MangaPlus + + url := "https://api.mangadex.org/manga/" + mangaId + "?includes[]=author&includes[]=cover_art" + data := mangacrawler.GetJson(url) + homepath, err := user.Current() + if err != nil { + panic(err) + } + + if err := json.Unmarshal(data, &author); err != nil { + panic(err) + } + + fmt.Println("Downloading and adding cover page for EPUB") + var coverPath string + var coverFile string + for _, rels := range author.Data.Rels { + if rels.Type == "cover_art" { + coverPath, coverFile = getCoverPage(mangaId, rels.Attributes.File, mangaPath) + } + } + + book := epub.NewEpub(mangaTitle) + book.SetAuthor(author.Data.Rels[0].Attributes.Name) + book.SetDescription(author.Data.Attr.Desc.En) + bookCss, _ := book.AddCSS(strings.Join([]string{homepath.HomeDir, "mangas/EPUB", "epub.css"}, "/"), "") + bookCover, _ := book.AddImage(coverPath, coverFile) + + book.SetCover(bookCover, "") + fmt.Println("Cover page added") + + fmt.Println("Adding pages to EPUB. Each chapter is a section\nIf chapter title is available that will be used for section title") + addPages(book, mangaPath, bookCss) + + fmt.Println("Writing EPUB to disk...") + err = book.Write(strings.Join([]string{homepath.HomeDir, "mangas/EPUB", mangaTitle + ".epub"}, "/")) + if err != nil { + log.Fatal(err) + } + +} + +func addPages(book *epub.Epub, mangaPath string, bookCss string) *epub.Epub { + chapters, err := os.ReadDir(mangaPath) + if err != nil { + panic(err) + } + + titleCompile, _ := regexp.Compile(`^[A-Za-z][^\d]`) + bonusChapterCompile, _ := regexp.Compile(`^(z\d+)`) + chapterIndexCompile, _ := regexp.Compile(`chapter0*(\d+)`) + + for _, chapter := range chapters { + var section string + + if !strings.HasPrefix(chapter.Name(), "chapter") { + continue + } + + chapterNo := chapterIndexCompile.FindStringSubmatch(chapter.Name())[1] + // fmt.Println(chapterNo) + pages, _ := os.ReadDir(strings.Join([]string{mangaPath, chapter.Name()}, "/")) + + for i, page := range pages { + var sectionBody string + var subSectionBody string + + bookPage, _ := book.AddImage(strings.Join([]string{mangaPath, chapter.Name(), page.Name()}, "/"), page.Name()) + + if i == 0 { + sectionBody = fmt.Sprintf("\n", bookPage) + + if len(chapter.Name()) > 10 { + titleMatch := titleCompile.MatchString(chapter.Name()[11:]) + bonusChapterMatch := bonusChapterCompile.MatchString(chapter.Name()[11:]) + + if bonusChapterMatch && len(chapter.Name()) > 13 { + bonusChapterNo := bonusChapterCompile.FindStringSubmatch(chapter.Name()[11:]) + section, err = book.AddSection(sectionBody, "Chapter "+chapterNo+strings.Replace(bonusChapterNo[1], "z", ".", 1)+": "+chapter.Name()[14:], "", bookCss) + if err != nil { + panic(err) + } + } else if bonusChapterMatch { + bonusChapterNo := bonusChapterCompile.FindStringSubmatch(chapter.Name()[11:]) + section, err = book.AddSection(sectionBody, "Chapter "+chapterNo+strings.Replace(bonusChapterNo[1], "z", ".", 1), "", bookCss) + if err != nil { + panic(err) + } + } else if titleMatch { + section, err = book.AddSection(sectionBody, "Chapter "+chapterNo+": "+chapter.Name()[11:], "", bookCss) + if err != nil { + panic(err) + } + + } + } else { + section, err = book.AddSection(sectionBody, "Chapter "+chapterNo, "", bookCss) + if err != nil { + panic(err) + } + } + } else { + subSectionBody = fmt.Sprintf("\n", bookPage) + _, _ = book.AddSubSection(section, subSectionBody, "", "", bookCss) + } + } + } + return book +} + +func getCoverPage(mangaId string, coverFile string, mangaPath string) (string, string) { + _, err := os.Stat(strings.Join([]string{mangaPath, coverFile}, "/")) + if err == nil { + return strings.Join([]string{mangaPath, coverFile}, "/"), coverFile + } + url := strings.Join([]string{"https://uploads.mangadex.org/covers", mangaId, coverFile}, "/") + result, err := http.Get(url) + if err != nil { + panic(err) + } + defer result.Body.Close() + + file, err := os.Create(strings.Join([]string{mangaPath, coverFile}, "/")) + if err != nil { + panic(err) + } + + _, err = io.Copy(file, result.Body) + if err != nil { + panic(err) + } + + file.Close() + + return strings.Join([]string{mangaPath, coverFile}, "/"), coverFile +} diff --git a/epub.css b/epub.css new file mode 100644 index 0000000..c81f3f8 --- /dev/null +++ b/epub.css @@ -0,0 +1,3 @@ +img { + break-after: page +} diff --git a/go-epub b/go-epub new file mode 160000 index 0000000..9229546 --- /dev/null +++ b/go-epub @@ -0,0 +1 @@ +Subproject commit 92295466371650445c7817072fad3227c78e4b2f diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..a8126c4 --- /dev/null +++ b/go.mod @@ -0,0 +1,15 @@ +module mangacrawler + +go 1.21.0 + +replace github.com/go-shiori/go-epub => ./go-epub + +require github.com/go-shiori/go-epub v1.1.0 + +require ( + github.com/gabriel-vasile/mimetype v1.4.2 // indirect + github.com/gofrs/uuid v4.4.0+incompatible // indirect + github.com/vincent-petithory/dataurl v1.0.0 // indirect + golang.org/x/net v0.13.0 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..6b6d39e --- /dev/null +++ b/go.sum @@ -0,0 +1,11 @@ +github.com/gabriel-vasile/mimetype v1.4.2 h1:w5qFW6JKBz9Y393Y4q372O9A7cUSequkh1Q7OhCmWKU= +github.com/gabriel-vasile/mimetype v1.4.2/go.mod h1:zApsH/mKG4w07erKIaJPFiX0Tsq9BFQgN3qGY5GnNgA= +github.com/gofrs/uuid v4.4.0+incompatible h1:3qXRTX8/NbyulANqlc0lchS1gqAVxRgsuW1YrTJupqA= +github.com/gofrs/uuid v4.4.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM= +github.com/vincent-petithory/dataurl v1.0.0 h1:cXw+kPto8NLuJtlMsI152irrVw9fRDX8AbShPRpg2CI= +github.com/vincent-petithory/dataurl v1.0.0/go.mod h1:FHafX5vmDzyP+1CQATJn7WFKc9CvnvxyvZy6I1MrG/U= +golang.org/x/net v0.13.0 h1:Nvo8UFsZ8X3BhAC9699Z1j7XQ3rsZnUUm7jfBEk1ueY= +golang.org/x/net v0.13.0/go.mod h1:zEVYFnQC7m/vmpQFELhcD1EWkZlX69l4oqgmer6hfKA= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/go.work b/go.work new file mode 100644 index 0000000..b6937e0 --- /dev/null +++ b/go.work @@ -0,0 +1,6 @@ +go 1.21.0 + +use ( + . + ./go-epub +) diff --git a/go.work.sum b/go.work.sum new file mode 100644 index 0000000..0b6db89 --- /dev/null +++ b/go.work.sum @@ -0,0 +1,4 @@ +golang.org/x/crypto v0.11.0/go.mod h1:xgJhtzW8F9jGdVFWZESrid1U1bjeNy4zgy5cRr/CIio= +golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/term v0.10.0/go.mod h1:lpqdcUyK/oCiQxvxVrppt5ggO2KCZ5QblwqPnfZ6d5o= +golang.org/x/text v0.11.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE= diff --git a/main.go b/main.go new file mode 100644 index 0000000..84ef6a8 --- /dev/null +++ b/main.go @@ -0,0 +1,140 @@ +package main + +import ( + "flag" + "fmt" + "log" + "os" + "strings" + + "mangacrawler/createepub" + "mangacrawler/mangacrawler" + + "gopkg.in/yaml.v2" +) + +func main() { + // get infos for the manga we want to download + var file string + var forceDl bool + var forceEpub bool + var mangas []mangacrawler.MangaYaml + var skipDl bool + + flag.BoolVar(&forceEpub, "force-epub", false, "Flag for creating an EPUB from the manga") + flag.BoolVar(&skipDl, "skip-download", false, "Flag for not downloading the manga") + flag.BoolVar(&forceDl, "force-download", false, "Download already downloaded chapters") + flag.StringVar(&file, "file", "", "File with manga IDs. If not provided you need to add manga IDs as arguments") + flag.Parse() + + if len(flag.Args()) == 0 && file == "" { + fmt.Printf("Usage: %s [options] [--file /path/to/yaml] or [id1 id2 ...]\n\nParameters:\n", os.Args[0]) + flag.PrintDefaults() + os.Exit(0) + } + + if file != "" { + mangas = parseFile(file) + } else { + for _, id := range flag.Args() { + var manga mangacrawler.MangaYaml + + manga.ID = id + manga.Chapter = -1 + manga.Completed = false + + mangas = append(mangas, manga) + } + } + + homepath, err := os.UserHomeDir() + if err != nil { + log.Fatal(err) + } + + for i, manga := range mangas { + manga.Name, manga.Completed = mangacrawler.GetMangaInfo(manga) + var newChapter bool + + mangapath := strings.Join([]string{homepath, "mangas/MangaDex", manga.Name}, "/") + os.MkdirAll(mangapath, 0755) + + if (!manga.Completed && !skipDl) || forceDl { + manga, newChapter = mangacrawler.GetManga(manga, mangapath, forceDl) + } else if manga.Completed { + fmt.Print(" Manga already completed!\n\n") + } + + if _, err := os.Stat(strings.Join([]string{homepath, "mangas/EPUB", manga.Name + ".epub"}, "/")); err != nil || forceEpub || newChapter { + epubPath := strings.Join([]string{homepath, "mangas/EPUB"}, "/") + os.MkdirAll(epubPath, 0755) + fmt.Println("Generating EPUB") + createepub.CreateEpub(mangapath, manga.Name, manga.ID) + fmt.Printf("EPUB created and saved under: %s\n\n", epubPath) + } else { + fmt.Print("EPUB exists already!\n\n") + } + + mangas[i] = manga + } + + if file != "" { + writeFile(file, mangas) + } + + if file == "" { + yamlPrint, _ := yaml.Marshal(&mangas) + fmt.Println(string(yamlPrint)) + } +} + +func parseFile(file string) []mangacrawler.MangaYaml { + var fBytes []byte + var yamlData []mangacrawler.MangaYaml + + if !strings.HasPrefix(file, "/") { + cwd, _ := os.Getwd() + if _, err := os.Stat(strings.Join([]string{cwd, file}, "/")); err != nil { + log.Fatal("File not found: ", file) + } + fBytes, _ = os.ReadFile(strings.Join([]string{cwd, file}, "/")) + } else { + if _, err := os.Stat(file); err != nil { + log.Fatal("File not found: ", file) + } + fBytes, _ = os.ReadFile(file) + } + + err := yaml.Unmarshal(fBytes, &yamlData) + if err != nil { + panic(err) + } + + mangas := yamlData + + return mangas +} + +func writeFile(file string, mangas []mangacrawler.MangaYaml) { + var filePath string + + if !strings.HasPrefix(file, "/") { + cwd, _ := os.Getwd() + if _, err := os.Stat(strings.Join([]string{cwd, file}, "/")); err != nil { + log.Fatal("File not found: ", file) + } + filePath = strings.Join([]string{cwd, file}, "/") + } else { + if _, err := os.Stat(file); err != nil { + log.Fatal("File not found: ", file) + } + filePath = file + } + + data, err := yaml.Marshal(&mangas) + if err != nil { + panic(err) + } + + _ = os.WriteFile(filePath, data, 0644) +} diff --git a/mangacrawler/chapterdownload.go b/mangacrawler/chapterdownload.go new file mode 100644 index 0000000..3ce6b07 --- /dev/null +++ b/mangacrawler/chapterdownload.go @@ -0,0 +1,77 @@ +package mangacrawler + +import ( + "encoding/json" + "fmt" + "io" + "log" + "net/http" + "os" + "regexp" + "strings" +) + +type Chapter struct { + Url string `json:"baseUrl"` + Data ChapterData `json:"chapter"` +} + +type ChapterData struct { + Pages []string `json:"data"` + Hash string `json:"hash"` +} + +func chapterDownload(chapterId string, chapterPath string, chapterNo string) { + var pages Chapter + + url := "https://api.mangadex.org/at-home/server/" + chapterId + data := GetJson(url) + + if err := json.Unmarshal(data, &pages); err != nil { + log.Fatal(err) + } + + for _, page := range pages.Data.Pages { + url = strings.Join([]string{pages.Url, "data", pages.Data.Hash, page}, "/") + pageDownload(url, chapterPath, page, chapterNo) + } +} + +func pageDownload(url string, path string, page string, chapterNo string) { + filepage := page + regMatch, _ := regexp.MatchString(`^\D`, filepage) + if regMatch { + filepage = filepage[1:] + } + fileSplit := strings.Split(filepage, ".") + filepage = strings.Join([]string{fmt.Sprintf("%067s", fileSplit[0]), fileSplit[1]}, ".") + + if _, err := os.Stat(path + "/chapter" + chapterNo + "_" + filepage); err == nil { + return + } + + result, err := http.Get(url) + if err != nil { + panic(err) + } + defer result.Body.Close() + + if result.StatusCode != 200 { + pageDownload(url, path, page, chapterNo) + return + } + + file, err := os.Create(path + "/chapter" + chapterNo + "_" + filepage) + if err != nil { + panic(err) + } + + _, err = io.Copy(file, result.Body) + if err != nil { + panic(err) + } + + file.Close() + + fmt.Printf("Downloading: %s\n", filepage) +} diff --git a/mangacrawler/chaptersearch.go b/mangacrawler/chaptersearch.go new file mode 100644 index 0000000..ca9b7aa --- /dev/null +++ b/mangacrawler/chaptersearch.go @@ -0,0 +1,74 @@ +package mangacrawler + +import ( + "encoding/json" + "strconv" +) + +type Chapters struct { + Data []ChaptersData `json:"data"` + Total int `json:"total"` +} + +type ChaptersData struct { + Id string `json:"id"` + Attributes ChaptersAttributes `json:"attributes"` + Rels []ChaptersRels `json:"relationships"` +} + +type ChaptersAttributes struct { + Volume string `json:"volume"` + Chapter string `json:"chapter"` + Title string `json:"title"` + Language string `json:"translatedLanguage"` +} + +type ChaptersRels struct { + RelsAttr RelsAttributes `json:"attributes"` +} + +type RelsAttributes struct { + Name string `json:"name"` +} + +func getChapterInfo(mangaId string) []ChaptersData { + var tempChapters Chapters + var chapters Chapters + + url := "https://api.mangadex.org/manga/" + mangaId + "/feed" + + data := GetJson(url) + + if err := json.Unmarshal(data, &tempChapters); err != nil { + panic(err) + } + + if tempChapters.Total > 100 { + var chaptersOffset Chapters + offset := 1 + maxOffset := tempChapters.Total / 100 + + for offset <= maxOffset { + url = "https://api.mangadex.org/manga/" + mangaId + "/feed?offset=" + strconv.Itoa(offset*100) + + data = GetJson(url) + + if err := json.Unmarshal(data, &chaptersOffset); err != nil { + panic(err) + } + + tempChapters.Data = append(tempChapters.Data, chaptersOffset.Data...) + + offset++ + } + } + + for _, chapter := range tempChapters.Data { + if chapter.Attributes.Language == "en" { + chapters.Data = append(chapters.Data, chapter) + } + } + + return chapters.Data + +} diff --git a/mangacrawler/mangacrawler.go b/mangacrawler/mangacrawler.go new file mode 100644 index 0000000..23660cb --- /dev/null +++ b/mangacrawler/mangacrawler.go @@ -0,0 +1,74 @@ +package mangacrawler + +import ( + "fmt" + "io" + "net/http" + "os" + "strconv" + "strings" + "time" +) + +type MangaYaml struct { + Name string + ID string + Chapter float64 + Completed bool +} + +func GetManga(manga MangaYaml, filepath string, forceDl bool) (MangaYaml, bool) { + chaptersData := getChapterInfo(manga.ID) + newChapter := false + latestChapter := manga.Chapter + + // set subdirs for chapters in style volume-chapter-name + for _, chapter := range chaptersData { + // chapterVolume := chapter.Attributes.Volume + chapterIndex, _ := strconv.ParseFloat(chapter.Attributes.Chapter, 32) + if chapterIndex > manga.Chapter || forceDl { + newChapter = true + chapterChapter := fmt.Sprintf("%03s", chapter.Attributes.Chapter) + extraChapter := strings.Split(chapterChapter, ".") + if len(extraChapter) > 1 { + chapterChapter = strings.Join([]string{fmt.Sprintf("%03s", extraChapter[0]), "z" + extraChapter[1]}, "-") + } + chapterTitle := chapter.Attributes.Title + + fmt.Printf("Working on Chapter: %s %s\n", chapterChapter, chapterTitle) + chapterpath := strings.Join([]string{filepath, "chapter" + chapterChapter}, "/") + if len(chapterTitle) > 0 { + chapterpath = strings.Join([]string{filepath, "chapter" + chapterChapter + "-" + chapterTitle}, "/") + } + os.MkdirAll(chapterpath, 0755) + chapterDownload(chapter.Id, chapterpath, chapterChapter) + fmt.Println() + time.Sleep(1 * time.Second) + } + if chapterIndex > latestChapter { + latestChapter = chapterIndex + } + } + if !newChapter { + fmt.Print(" No new chapter released yet!\n\n") + } + + manga.Chapter = latestChapter + + return manga, newChapter +} + +func GetJson(url string) []byte { + response, err := http.Get(url) + if err != nil { + panic(err) + } + defer response.Body.Close() + + data, err := io.ReadAll(response.Body) + if err != nil { + panic(err) + } + + return data +} diff --git a/mangacrawler/mangasearch.go b/mangacrawler/mangasearch.go new file mode 100644 index 0000000..7d22849 --- /dev/null +++ b/mangacrawler/mangasearch.go @@ -0,0 +1,76 @@ +package mangacrawler + +import ( + "bufio" + "encoding/json" + "fmt" + "os" + "strconv" + "strings" +) + +type Manga struct { + Data MangaData `json:"data"` +} + +type MangaData struct { + Attributes MangaAttributes `json:"attributes"` +} + +type MangaAttributes struct { + Title Titles `json:"title"` + AltTitle []Titles `json:"altTitles"` + Status string `json:"status"` + LastChapter string `json:"lastChapter"` +} + +type Titles struct { + JP string `json:"ja-ro"` + EN string `json:"en"` +} + +func GetMangaInfo(mangaYaml MangaYaml) (string, bool) { + var manga Manga + status := false + homepath, _ := os.UserHomeDir() + + url := "https://api.mangadex.org/manga/" + mangaYaml.ID + data := GetJson(url) + if err := json.Unmarshal(data, &manga); err != nil { + panic(err) + } + + mangaLastChapter, _ := strconv.ParseFloat(manga.Data.Attributes.LastChapter, 32) + if manga.Data.Attributes.Status == "completed" && (mangaLastChapter <= mangaYaml.Chapter || manga.Data.Attributes.LastChapter == "") { + status = true + } + + // set home directory and create subdir to save manga in + mangaTitles := []string{manga.Data.Attributes.Title.EN} + for _, title := range manga.Data.Attributes.AltTitle { + if title.EN != "" { + mangaTitles = append(mangaTitles, title.EN) + } else if title.JP != "" { + mangaTitles = append(mangaTitles, title.JP) + } + } + for _, title := range mangaTitles { + if _, err := os.Stat(strings.Join([]string{homepath, "mangas/MangaDex", title}, "/")); err == nil && title != "" { + fmt.Printf("Title found on system! Using: %s\n", title) + return title, status + } + } + + for i, title := range mangaTitles { + fmt.Printf("(%d): %s\n", i, title) + } + reader := bufio.NewReader(os.Stdin) + fmt.Print("---\nPlease choose title for the manga: ") + selection, _ := reader.ReadString('\n') + selection = strings.TrimSuffix(selection, "\n") + choice, _ := strconv.Atoi(selection) + + mangaTitle := mangaTitles[choice] + + return mangaTitle, status +}