Changes
On 27 Eylül 2022 11:23:58 UTC, collectivat:
-
Changed title to Synthetic parallel corpora Ladino-English, Turkish, Spanish (previously Synthetic parallel corpora LAD-EN, TR)
f | 1 | { | f | 1 | { |
2 | "author": "", | 2 | "author": "", | ||
3 | "author_email": "", | 3 | "author_email": "", | ||
4 | "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", | 4 | "creator_user_id": "add784a5-9327-4063-a0e3-08eb34a96d52", | ||
5 | "extras": [ | 5 | "extras": [ | ||
6 | { | 6 | { | ||
7 | "key": "image_url", | 7 | "key": "image_url", | ||
8 | "value": | 8 | "value": | ||
9 | a.sefarad.com.tr/uploads/group/2022-08-09-171438.401553ladinotext.jpg" | 9 | a.sefarad.com.tr/uploads/group/2022-08-09-171438.401553ladinotext.jpg" | ||
10 | } | 10 | } | ||
11 | ], | 11 | ], | ||
12 | "groups": [ | 12 | "groups": [ | ||
13 | { | 13 | { | ||
14 | "description": "Text corpora, parallel corpora", | 14 | "description": "Text corpora, parallel corpora", | ||
15 | "display_name": "Text dataset", | 15 | "display_name": "Text dataset", | ||
16 | "id": "e6125090-f7e9-4729-9a13-e4931090508a", | 16 | "id": "e6125090-f7e9-4729-9a13-e4931090508a", | ||
17 | "image_display_url": | 17 | "image_display_url": | ||
18 | .sefarad.com.tr/uploads/group/2022-08-09-171438.401553ladinotext.jpg", | 18 | .sefarad.com.tr/uploads/group/2022-08-09-171438.401553ladinotext.jpg", | ||
19 | "name": "text", | 19 | "name": "text", | ||
20 | "title": "Text dataset" | 20 | "title": "Text dataset" | ||
21 | } | 21 | } | ||
22 | ], | 22 | ], | ||
23 | "id": "5801010b-181a-45f2-b3d2-05861953ac63", | 23 | "id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
24 | "isopen": true, | 24 | "isopen": true, | ||
25 | "license_id": "cc-by", | 25 | "license_id": "cc-by", | ||
26 | "license_title": "Creative Commons Attribution", | 26 | "license_title": "Creative Commons Attribution", | ||
27 | "license_url": "http://www.opendefinition.org/licenses/cc-by", | 27 | "license_url": "http://www.opendefinition.org/licenses/cc-by", | ||
28 | "maintainer": "", | 28 | "maintainer": "", | ||
29 | "maintainer_email": "", | 29 | "maintainer_email": "", | ||
30 | "metadata_created": "2022-08-09T15:56:40.059317", | 30 | "metadata_created": "2022-08-09T15:56:40.059317", | ||
n | 31 | "metadata_modified": "2022-09-27T11:19:23.749145", | n | 31 | "metadata_modified": "2022-09-27T11:23:58.521920", |
32 | "name": "synthetic-parallel-data", | 32 | "name": "synthetic-parallel-data", | ||
33 | "notes": "Synthetically produced parallel data using rule-based | 33 | "notes": "Synthetically produced parallel data using rule-based | ||
34 | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Turkish: | 34 | Spanish-Ladino translation.\r\n\r\nSizes:\r\n\r\nLadino-Turkish: | ||
35 | 4,574,021 sentences\r\n\r\nLadino-English: 5,748,012 | 35 | 4,574,021 sentences\r\n\r\nLadino-English: 5,748,012 | ||
36 | sentences\r\n\r\nTotal Ladino-Spanish: 10,322,033 sentences (This is | 36 | sentences\r\n\r\nTotal Ladino-Spanish: 10,322,033 sentences (This is | ||
37 | basically combination of the two corpora)\r\n\r\nPaper: | 37 | basically combination of the two corpora)\r\n\r\nPaper: | ||
38 | https://arxiv.org/abs/2205.15599\r\n\r\nLicense: CC-BY\r\n\r\nThis | 38 | https://arxiv.org/abs/2205.15599\r\n\r\nLicense: CC-BY\r\n\r\nThis | ||
39 | dataset is created as part of project \"Judeo-Spanish: Connecting the | 39 | dataset is created as part of project \"Judeo-Spanish: Connecting the | ||
40 | two ends of the Mediterranean\" carried out by Col\u00b7lectivaT and | 40 | two ends of the Mediterranean\" carried out by Col\u00b7lectivaT and | ||
41 | Sephardic Center of Istanbul within the framework of the \u201cGrant | 41 | Sephardic Center of Istanbul within the framework of the \u201cGrant | ||
42 | Scheme for Common Cultural Heritage: Preservation and Dialogue between | 42 | Scheme for Common Cultural Heritage: Preservation and Dialogue between | ||
43 | Turkey and the EU\u2013II (CCH-II)\u201d implemented by the Ministry | 43 | Turkey and the EU\u2013II (CCH-II)\u201d implemented by the Ministry | ||
44 | of Culture and Tourism of the Republic of Turkey with the financial | 44 | of Culture and Tourism of the Republic of Turkey with the financial | ||
45 | support of the European Union. The content of this website is the sole | 45 | support of the European Union. The content of this website is the sole | ||
46 | responsibility of Col\u00b7lectivaT and does not necessarily reflect | 46 | responsibility of Col\u00b7lectivaT and does not necessarily reflect | ||
47 | the views of the European Union.", | 47 | the views of the European Union.", | ||
48 | "num_resources": 3, | 48 | "num_resources": 3, | ||
49 | "num_tags": 1, | 49 | "num_tags": 1, | ||
50 | "organization": { | 50 | "organization": { | ||
51 | "approval_status": "approved", | 51 | "approval_status": "approved", | ||
52 | "created": "2022-08-09T08:55:19.044966", | 52 | "created": "2022-08-09T08:55:19.044966", | ||
53 | "description": "Col\u00b7lectivaT is a non-profit cooperative | 53 | "description": "Col\u00b7lectivaT is a non-profit cooperative | ||
54 | formed by knowledge workers that provides all-around services of | 54 | formed by knowledge workers that provides all-around services of | ||
55 | cultural translation, research and technological services for | 55 | cultural translation, research and technological services for | ||
56 | collaborative and linguistic work.", | 56 | collaborative and linguistic work.", | ||
57 | "id": "0510debc-4a80-4144-b198-def8e40d28d9", | 57 | "id": "0510debc-4a80-4144-b198-def8e40d28d9", | ||
58 | "image_url": "https://collectivat.cat/img/logo_sm.png", | 58 | "image_url": "https://collectivat.cat/img/logo_sm.png", | ||
59 | "is_organization": true, | 59 | "is_organization": true, | ||
60 | "name": "col-lectivat", | 60 | "name": "col-lectivat", | ||
61 | "state": "active", | 61 | "state": "active", | ||
62 | "title": "Col\u00b7lectivaT", | 62 | "title": "Col\u00b7lectivaT", | ||
63 | "type": "organization" | 63 | "type": "organization" | ||
64 | }, | 64 | }, | ||
65 | "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", | 65 | "owner_org": "0510debc-4a80-4144-b198-def8e40d28d9", | ||
66 | "private": false, | 66 | "private": false, | ||
67 | "relationships_as_object": [], | 67 | "relationships_as_object": [], | ||
68 | "relationships_as_subject": [], | 68 | "relationships_as_subject": [], | ||
69 | "resources": [ | 69 | "resources": [ | ||
70 | { | 70 | { | ||
71 | "cache_last_updated": null, | 71 | "cache_last_updated": null, | ||
72 | "cache_url": null, | 72 | "cache_url": null, | ||
73 | "created": "2022-08-09T15:57:05.916509", | 73 | "created": "2022-08-09T15:57:05.916509", | ||
74 | "description": "", | 74 | "description": "", | ||
75 | "format": "ZIP", | 75 | "format": "ZIP", | ||
76 | "hash": "", | 76 | "hash": "", | ||
77 | "id": "f267ee8d-6e12-48a0-9c6f-4ce41ee11255", | 77 | "id": "f267ee8d-6e12-48a0-9c6f-4ce41ee11255", | ||
78 | "last_modified": null, | 78 | "last_modified": null, | ||
79 | "metadata_modified": "2022-08-09T15:57:05.911158", | 79 | "metadata_modified": "2022-08-09T15:57:05.911158", | ||
80 | "mimetype": "application/zip", | 80 | "mimetype": "application/zip", | ||
81 | "mimetype_inner": null, | 81 | "mimetype_inner": null, | ||
82 | "name": "Ladino - English", | 82 | "name": "Ladino - English", | ||
83 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | 83 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
84 | "position": 0, | 84 | "position": 0, | ||
85 | "resource_type": null, | 85 | "resource_type": null, | ||
86 | "size": null, | 86 | "size": null, | ||
87 | "state": "active", | 87 | "state": "active", | ||
88 | "url": | 88 | "url": | ||
89 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_en.csv.zip", | 89 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_en.csv.zip", | ||
90 | "url_type": null | 90 | "url_type": null | ||
91 | }, | 91 | }, | ||
92 | { | 92 | { | ||
93 | "cache_last_updated": null, | 93 | "cache_last_updated": null, | ||
94 | "cache_url": null, | 94 | "cache_url": null, | ||
95 | "created": "2022-08-09T15:57:38.631681", | 95 | "created": "2022-08-09T15:57:38.631681", | ||
96 | "description": "", | 96 | "description": "", | ||
97 | "format": "ZIP", | 97 | "format": "ZIP", | ||
98 | "hash": "", | 98 | "hash": "", | ||
99 | "id": "bac5f97b-9054-4ffe-9d8f-0022af74a508", | 99 | "id": "bac5f97b-9054-4ffe-9d8f-0022af74a508", | ||
100 | "last_modified": null, | 100 | "last_modified": null, | ||
101 | "metadata_modified": "2022-08-09T15:57:38.626254", | 101 | "metadata_modified": "2022-08-09T15:57:38.626254", | ||
102 | "mimetype": "application/zip", | 102 | "mimetype": "application/zip", | ||
103 | "mimetype_inner": null, | 103 | "mimetype_inner": null, | ||
104 | "name": "Ladino - Turkish", | 104 | "name": "Ladino - Turkish", | ||
105 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | 105 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
106 | "position": 1, | 106 | "position": 1, | ||
107 | "resource_type": null, | 107 | "resource_type": null, | ||
108 | "size": null, | 108 | "size": null, | ||
109 | "state": "active", | 109 | "state": "active", | ||
110 | "url": | 110 | "url": | ||
111 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_tr.csv.zip", | 111 | //collectivat.cat/share/ladino_synthetic_parallel_dataset_tr.csv.zip", | ||
112 | "url_type": null | 112 | "url_type": null | ||
113 | }, | 113 | }, | ||
114 | { | 114 | { | ||
115 | "cache_last_updated": null, | 115 | "cache_last_updated": null, | ||
116 | "cache_url": null, | 116 | "cache_url": null, | ||
117 | "created": "2022-09-27T11:19:23.761065", | 117 | "created": "2022-09-27T11:19:23.761065", | ||
118 | "description": "", | 118 | "description": "", | ||
119 | "format": "ZIP", | 119 | "format": "ZIP", | ||
120 | "hash": "", | 120 | "hash": "", | ||
121 | "id": "2f895bd6-c0f2-455b-9ae9-ff5678d8672f", | 121 | "id": "2f895bd6-c0f2-455b-9ae9-ff5678d8672f", | ||
122 | "last_modified": null, | 122 | "last_modified": null, | ||
123 | "metadata_modified": "2022-09-27T11:19:23.753453", | 123 | "metadata_modified": "2022-09-27T11:19:23.753453", | ||
124 | "mimetype": "application/zip", | 124 | "mimetype": "application/zip", | ||
125 | "mimetype_inner": null, | 125 | "mimetype_inner": null, | ||
126 | "name": "Ladino - Spanish", | 126 | "name": "Ladino - Spanish", | ||
127 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | 127 | "package_id": "5801010b-181a-45f2-b3d2-05861953ac63", | ||
128 | "position": 2, | 128 | "position": 2, | ||
129 | "resource_type": null, | 129 | "resource_type": null, | ||
130 | "size": null, | 130 | "size": null, | ||
131 | "state": "active", | 131 | "state": "active", | ||
132 | "url": "http://collectivat.cat/share/dataset_es-lad.csv.zip", | 132 | "url": "http://collectivat.cat/share/dataset_es-lad.csv.zip", | ||
133 | "url_type": null | 133 | "url_type": null | ||
134 | } | 134 | } | ||
135 | ], | 135 | ], | ||
136 | "state": "active", | 136 | "state": "active", | ||
137 | "tags": [ | 137 | "tags": [ | ||
138 | { | 138 | { | ||
139 | "display_name": "parallel text", | 139 | "display_name": "parallel text", | ||
140 | "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", | 140 | "id": "896bbff4-e82d-4f13-9668-dd08a37eb81c", | ||
141 | "name": "parallel text", | 141 | "name": "parallel text", | ||
142 | "state": "active", | 142 | "state": "active", | ||
143 | "vocabulary_id": null | 143 | "vocabulary_id": null | ||
144 | } | 144 | } | ||
145 | ], | 145 | ], | ||
t | 146 | "title": "Synthetic parallel corpora LAD-EN, TR", | t | 146 | "title": "Synthetic parallel corpora Ladino-English, Turkish, |
147 | Spanish", | ||||
147 | "type": "dataset", | 148 | "type": "dataset", | ||
148 | "url": "", | 149 | "url": "", | ||
149 | "version": "" | 150 | "version": "" | ||
150 | } | 151 | } |