return rc;
}
EXPORT_SYMBOL(nd_pfn_probe);
- if (nd_pfn->mode == PFN_MODE_PMEM)
- offset = ALIGN(start + SZ_8K + 64 * npfns + dax_label_reserve,
+
+ /*
+ * We hotplug memory at section granularity, pad the reserved area from
+ * the previous section base to the namespace base address.
+ */
+ static unsigned long init_altmap_base(resource_size_t base)
+ {
+ unsigned long base_pfn = PHYS_PFN(base);
+
+ return PFN_SECTION_ALIGN_DOWN(base_pfn);
+ }
+
+ static unsigned long init_altmap_reserve(resource_size_t base)
+ {
+ unsigned long reserve = PHYS_PFN(SZ_8K);
+ unsigned long base_pfn = PHYS_PFN(base);
+
+ reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
+ return reserve;
+ }
+
+ static struct vmem_altmap *__nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
+ struct resource *res, struct vmem_altmap *altmap)
+ {
+ struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
+ u64 offset = le64_to_cpu(pfn_sb->dataoff);
+ u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
+ u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
+ resource_size_t base = nsio->res.start + start_pad;
+ struct vmem_altmap __altmap = {
+ .base_pfn = init_altmap_base(base),
+ .reserve = init_altmap_reserve(base),
+ };
+
+ memcpy(res, &nsio->res, sizeof(*res));
+ res->start += start_pad;
+ res->end -= end_trunc;
+
+ nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
+ if (nd_pfn->mode == PFN_MODE_RAM) {
+ if (offset < SZ_8K)
+ return ERR_PTR(-EINVAL);
+ nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
+ altmap = NULL;
+ } else if (nd_pfn->mode == PFN_MODE_PMEM) {
+ nd_pfn->npfns = (resource_size(res) - offset) / PAGE_SIZE;
+ if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
+ dev_info(&nd_pfn->dev,
+ "number of pfns truncated from %lld to %ld\n",
+ le64_to_cpu(nd_pfn->pfn_sb->npfns),
+ nd_pfn->npfns);
+ memcpy(altmap, &__altmap, sizeof(*altmap));
+ altmap->free = PHYS_PFN(offset - SZ_8K);
+ altmap->alloc = 0;
+ } else
+ return ERR_PTR(-ENXIO);
+
+ return altmap;
+ }
+
+ static int nd_pfn_init(struct nd_pfn *nd_pfn)
+ {
+ u32 dax_label_reserve = is_nd_dax(&nd_pfn->dev) ? SZ_128K : 0;
+ struct nd_namespace_common *ndns = nd_pfn->ndns;
+ u32 start_pad = 0, end_trunc = 0;
+ resource_size_t start, size;
+ struct nd_namespace_io *nsio;
+ struct nd_region *nd_region;
+ struct nd_pfn_sb *pfn_sb;
+ unsigned long npfns;
+ phys_addr_t offset;
+ u64 checksum;
+ int rc;
+
+ pfn_sb = devm_kzalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL);
+ if (!pfn_sb)
+ return -ENOMEM;
+
+ nd_pfn->pfn_sb = pfn_sb;
+ rc = nd_pfn_validate(nd_pfn);
+ if (rc != -ENODEV)
+ return rc;
+
+ /* no info block, do init */;
+ nd_region = to_nd_region(nd_pfn->dev.parent);
+ if (nd_region->ro) {
+ dev_info(&nd_pfn->dev,
+ "%s is read-only, unable to init metadata\n",
+ dev_name(&nd_region->dev));
+ return -ENXIO;
+ }
+
+ memset(pfn_sb, 0, sizeof(*pfn_sb));
+
+ /*
+ * Check if pmem collides with 'System RAM' when section aligned and
+ * trim it accordingly
+ */
+ nsio = to_nd_namespace_io(&ndns->dev);
+ start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
+ size = resource_size(&nsio->res);
+ if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
+ IORES_DESC_NONE) == REGION_MIXED) {
+ start = nsio->res.start;
+ start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
+ }
+
+ start = nsio->res.start;
+ size = PHYS_SECTION_ALIGN_UP(start + size) - start;
+ if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
+ IORES_DESC_NONE) == REGION_MIXED) {
+ size = resource_size(&nsio->res);
+ end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
+ }
+
+ if (start_pad + end_trunc)
+ dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
+ dev_name(&ndns->dev), start_pad + end_trunc);
+
+ /*
+ * Note, we use 64 here for the standard size of struct page,
+ * debugging options may cause it to be larger in which case the
+ * implementation will limit the pfns advertised through
+ * ->direct_access() to those that are included in the memmap.
+ */
+ start += start_pad;
+ size = resource_size(&nsio->res);
+ npfns = (size - start_pad - end_trunc - SZ_8K) / SZ_4K;
- else if (nd_pfn->mode == PFN_MODE_RAM)
++ if (nd_pfn->mode == PFN_MODE_PMEM) {
++ unsigned long memmap_size;
++
++ /*
++ * vmemmap_populate_hugepages() allocates the memmap array in
++ * HPAGE_SIZE chunks.
++ */
++ memmap_size = ALIGN(64 * npfns, HPAGE_SIZE);
++ offset = ALIGN(start + SZ_8K + memmap_size + dax_label_reserve,
+ nd_pfn->align) - start;
++ } else if (nd_pfn->mode == PFN_MODE_RAM)
+ offset = ALIGN(start + SZ_8K + dax_label_reserve,
+ nd_pfn->align) - start;
+ else
+ return -ENXIO;
+
+ if (offset + start_pad + end_trunc >= size) {
+ dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
+ dev_name(&ndns->dev));
+ return -ENXIO;
+ }
+
+ npfns = (size - offset - start_pad - end_trunc) / SZ_4K;
+ pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
+ pfn_sb->dataoff = cpu_to_le64(offset);
+ pfn_sb->npfns = cpu_to_le64(npfns);
+ memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
+ memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
+ memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
+ pfn_sb->version_major = cpu_to_le16(1);
+ pfn_sb->version_minor = cpu_to_le16(2);
+ pfn_sb->start_pad = cpu_to_le32(start_pad);
+ pfn_sb->end_trunc = cpu_to_le32(end_trunc);
+ pfn_sb->align = cpu_to_le32(nd_pfn->align);
+ checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
+ pfn_sb->checksum = cpu_to_le64(checksum);
+
+ return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
+ }
+
+ /*
+ * Determine the effective resource range and vmem_altmap from an nd_pfn
+ * instance.
+ */
+ struct vmem_altmap *nvdimm_setup_pfn(struct nd_pfn *nd_pfn,
+ struct resource *res, struct vmem_altmap *altmap)
+ {
+ int rc;
+
+ if (!nd_pfn->uuid || !nd_pfn->ndns)
+ return ERR_PTR(-ENODEV);
+
+ rc = nd_pfn_init(nd_pfn);
+ if (rc)
+ return ERR_PTR(rc);
+
+ /* we need a valid pfn_sb before we can init a vmem_altmap */
+ return __nvdimm_setup_pfn(nd_pfn, res, altmap);
+ }
+ EXPORT_SYMBOL_GPL(nvdimm_setup_pfn);