Update README.md
Browse files
README.md
CHANGED
@@ -7,6 +7,87 @@ license: apache-2.0
|
|
7 |
language:
|
8 |
- en
|
9 |
pipeline_tag: text-generation
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
---
|
11 |
|
12 |
# Huginn-0125
|
|
|
7 |
language:
|
8 |
- en
|
9 |
pipeline_tag: text-generation
|
10 |
+
datasets:
|
11 |
+
- HuggingFaceTB/smollm-corpus
|
12 |
+
- jon-tow/starcoderdata-python-edu
|
13 |
+
- ubaada/booksum-complete-cleaned
|
14 |
+
- euirim/goodwiki
|
15 |
+
- togethercomputer/RedPajama-Data-1T
|
16 |
+
- allenai/dolma
|
17 |
+
- bigcode/the-stack-v2-train-smol-ids
|
18 |
+
- bigcode/starcoderdata
|
19 |
+
- m-a-p/Matrix
|
20 |
+
- cerebras/SlimPajama-627B
|
21 |
+
- open-phi/textbooks
|
22 |
+
- open-phi/textbooks_grounded
|
23 |
+
- open-phi/programming_books_llama
|
24 |
+
- nampdn-ai/tiny-strange-textbooks
|
25 |
+
- nampdn-ai/tiny-textbooks
|
26 |
+
- nampdn-ai/tiny-code-textbooks
|
27 |
+
- nampdn-ai/tiny-orca-textbooks
|
28 |
+
- SciPhi/textbooks-are-all-you-need-lite
|
29 |
+
- vikp/textbook_quality_programming
|
30 |
+
- EleutherAI/proof-pile-2
|
31 |
+
- open-web-math/open-web-math
|
32 |
+
- biglam/blbooks-parquet
|
33 |
+
- storytracer/LoC-PD-Books
|
34 |
+
- GAIR/MathPile
|
35 |
+
- tomg-group-umd/CLRS-Text-train
|
36 |
+
- math-ai/AutoMathText
|
37 |
+
- bigcode/commitpackft
|
38 |
+
- bigcode/stack-dedup-python-fns
|
39 |
+
- vikp/python_code_instructions_filtered
|
40 |
+
- mlabonne/chessllm
|
41 |
+
- Waterhorse/chess_data
|
42 |
+
- EleutherAI/lichess-puzzles
|
43 |
+
- chargoddard/WebInstructSub-prometheus
|
44 |
+
- Locutusque/hercules-v5.0
|
45 |
+
- nvidia/OpenMathInstruct-1
|
46 |
+
- meta-math/MetaMathQA
|
47 |
+
- m-a-p/CodeFeedback-Filtered-Instruction
|
48 |
+
- nvidia/Daring-Anteater
|
49 |
+
- nvidia/sft_datablend_v1
|
50 |
+
- BAAI/Infinity-Instruct
|
51 |
+
- anthracite-org/Stheno-Data-Filtered
|
52 |
+
- Nopm/Opus_WritingStruct
|
53 |
+
- xinlai/Math-Step-DPO-10K
|
54 |
+
- bigcode/self-oss-instruct-sc2-exec-filter-50k
|
55 |
+
- HuggingFaceTB/everyday-conversations
|
56 |
+
- hkust-nlp/gsm8k-fix
|
57 |
+
- HuggingFaceH4/no_robots
|
58 |
+
- THUDM/LongWriter-6k
|
59 |
+
- THUDM/webglm-qa
|
60 |
+
- AlgorithmicResearchGroup/ArXivDLInstruct
|
61 |
+
- allenai/tulu-v2-sft-mixture-olmo-4096
|
62 |
+
- bigscience/P3
|
63 |
+
- Gryphe/Sonnet3.5-SlimOrcaDedupCleaned
|
64 |
+
- Gryphe/Opus-WritingPrompts
|
65 |
+
- nothingiisreal/Reddit-Dirty-And-WritingPrompts
|
66 |
+
- nothingiisreal/Kalomaze-Opus-Instruct-25k-filtered
|
67 |
+
- internlm/Lean-Github
|
68 |
+
- pkuAI4M/LeanWorkbook
|
69 |
+
- casey-martin/multilingual-mathematical-autoformalization
|
70 |
+
- AI4M/leandojo-informalized
|
71 |
+
- casey-martin/oa_cpp_annotate_gen
|
72 |
+
- l3lab/ntp-mathlib-instruct-st
|
73 |
+
- ajibawa-2023/Maths-College
|
74 |
+
- ajibawa-2023/Maths-Grade-School
|
75 |
+
- ajibawa-2023/General-Stories-Collection
|
76 |
+
- XinyaoHu/AMPS_mathematica
|
77 |
+
- XinyaoHu/AMPS_khan
|
78 |
+
- Magpie-Align/Magpie-Pro-MT-300K-v0.1
|
79 |
+
- Magpie-Align/Magpie-Reasoning-150K
|
80 |
+
- gair-prox/FineWeb-pro
|
81 |
+
- gair-prox/c4-pro
|
82 |
+
- gair-prox/RedPajama-pro
|
83 |
+
- gair-prox/open-web-math-pro
|
84 |
+
- togethercomputer/Long-Data-Collections
|
85 |
+
- emozilla/pg19
|
86 |
+
- MathGenie/MathCode-Pile
|
87 |
+
- KingNish/reasoning-base-20k
|
88 |
+
- nvidia/OpenMathInstruct-2
|
89 |
+
- LLM360/TxT360
|
90 |
+
- neuralwork/arxiver
|
91 |
---
|
92 |
|
93 |
# Huginn-0125
|